1 Figure 1

library(data.table)
library(ggplot2)
library(cowplot)
library(readxl)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ tibble  3.1.7     ✔ dplyr   1.0.7
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ✔ purrr   0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::between()   masks data.table::between()
## ✖ dplyr::filter()    masks stats::filter()
## ✖ dplyr::first()     masks data.table::first()
## ✖ dplyr::lag()       masks stats::lag()
## ✖ dplyr::last()      masks data.table::last()
## ✖ purrr::transpose() masks data.table::transpose()
library(webchem)
library(vegan)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.6-2
library(fastcluster)
## 
## Attaching package: 'fastcluster'
## The following object is masked from 'package:stats':
## 
##     hclust
library(RColorBrewer)
library(ggpubr)
## 
## Attaching package: 'ggpubr'
## The following object is masked from 'package:cowplot':
## 
##     get_legend
library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.13.1
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
## 
## If you use it in published research, please cite:
## Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
##   genomic data. Bioinformatics 2016.
## 
## The new InteractiveComplexHeatmap package can directly export static 
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
library(ggrepel)
library(patchwork)
## 
## Attaching package: 'patchwork'
## The following object is masked from 'package:cowplot':
## 
##     align_plots
library(KEGGREST)

theme_set(theme_classic2())
datadir <- ""
outdir <- "figure1/"
dir.create(outdir)

sessionInfo()
## R version 4.1.1 (2021-08-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] KEGGREST_1.32.0       patchwork_1.1.1       ggrepel_0.9.1        
##  [4] ComplexHeatmap_2.13.1 ggpubr_0.4.0          RColorBrewer_1.1-2   
##  [7] fastcluster_1.2.3     vegan_2.6-2           lattice_0.20-45      
## [10] permute_0.9-5         webchem_1.1.2         forcats_0.5.1        
## [13] stringr_1.4.0         dplyr_1.0.7           purrr_0.3.4          
## [16] readr_2.1.2           tidyr_1.2.0           tibble_3.1.7         
## [19] tidyverse_1.3.1       readxl_1.3.1          cowplot_1.1.1        
## [22] ggplot2_3.3.6         data.table_1.14.2    
## 
## loaded via a namespace (and not attached):
##  [1] colorspace_2.0-3       ggsignif_0.6.3         rjson_0.2.21          
##  [4] ellipsis_0.3.2         circlize_0.4.15        XVector_0.34.0        
##  [7] GlobalOptions_0.1.2    fs_1.5.0               clue_0.3-61           
## [10] rstudioapi_0.13        fansi_1.0.3            lubridate_1.8.0       
## [13] xml2_1.3.2             codetools_0.2-18       splines_4.1.1         
## [16] doParallel_1.0.17      knitr_1.39             jsonlite_1.8.0        
## [19] broom_0.8.0            cluster_2.1.3          dbplyr_2.1.1          
## [22] png_0.1-7              data.tree_1.0.0        compiler_4.1.1        
## [25] httr_1.4.2             backports_1.2.1        assertthat_0.2.1      
## [28] Matrix_1.4-1           fastmap_1.1.0          cli_3.3.0             
## [31] htmltools_0.5.2        tools_4.1.1            gtable_0.3.0          
## [34] glue_1.6.2             GenomeInfoDbData_1.2.7 Rcpp_1.0.8.3          
## [37] carData_3.0-5          cellranger_1.1.0       jquerylib_0.1.4       
## [40] vctrs_0.4.1            Biostrings_2.62.0      nlme_3.1-159          
## [43] iterators_1.0.13       xfun_0.31              rvest_1.0.2           
## [46] lifecycle_1.0.0        rstatix_0.7.0          zlibbioc_1.40.0       
## [49] MASS_7.3-57            scales_1.1.1           hms_1.1.1             
## [52] parallel_4.1.1         yaml_2.3.5             sass_0.4.1            
## [55] stringi_1.7.6          S4Vectors_0.32.4       foreach_1.5.2         
## [58] BiocGenerics_0.40.0    shape_1.4.6            GenomeInfoDb_1.30.1   
## [61] rlang_1.0.2            pkgconfig_2.0.3        matrixStats_0.62.0    
## [64] bitops_1.0-7           evaluate_0.15          tidyselect_1.1.2      
## [67] magrittr_2.0.3         R6_2.5.1               IRanges_2.28.0        
## [70] generics_0.1.0         DBI_1.1.1              pillar_1.7.0          
## [73] haven_2.5.0            withr_2.5.0            mgcv_1.8-40           
## [76] abind_1.4-5            RCurl_1.98-1.7         modelr_0.1.8          
## [79] crayon_1.4.1           car_3.0-13             utf8_1.2.2            
## [82] tzdb_0.3.0             rmarkdown_2.14         GetoptLong_1.0.5      
## [85] reprex_2.0.1           digest_0.6.29          stats4_4.1.1          
## [88] munsell_0.5.0          bslib_0.3.1
####### Read in processed datasets
processed_data <- fread("processedDatasets/timecourse_all_data.csv")
summary_data <- fread("processedDatasets/timecourse_mean_summaries.csv")
#growth_file<-paste0(datadir, "TimeCourse_take2_2020-8-17/ODdata.xlsx")

#### Add in OD/growth data
growth_file <- "../TimeCourse_take2_2020-8-17/ODdata.xlsx"
growth_data <- read_xlsx(growth_file, sheet = 1)
time_pts <- read_xlsx(growth_file, sheet = 2)
time_pts <- data.table(time_pts %>% mutate(TimePoint = as.numeric(gsub("TP", "", TimePoint))))
growth_data <- data.table(growth_data) %>% melt(id.var = c("Strain", "AcConc", "Replicate"))
growth_data[,Time:=as.numeric(gsub(".*_", "", variable))]
growth2 <- growth_data
setnames(growth2, "value", "OD600")
growth2[Time==41.5, Time:=42]
growth2[Time==26.5, Time:=26]
growth2[Time==30.5, Time:=30]
mean_growth2 <- growth2[, .(meanOD =mean(OD600), sdOD = sd(OD600)), by=list(Strain, AcConc, Time)]


############# Growth data plots

growth_data_sub <- growth2[Strain == "2243" & AcConc==1, list(mean(OD600), sd(OD600)), by = Time]
growth_data_sub <- rbind(growth_data_sub, data.table(Time = 0, V1 = 0.01), fill = T)

growth2_sub <- growth2[Strain == "2243" & AcConc==1]
growth2_sub <- rbind(growth2_sub, data.table(Time = rep(0, 3), OD600 = rep(0.01, 3), Replicate = 1:3), fill = T)

growth_plot_heatmap <- ggplot(growth2_sub[,mean(OD600), by=Time], aes(x = factor(Time), y = 1, fill = V1)) + geom_tile(color = "black") + xlab("Time (hr)") + 
  theme_minimal() + scale_fill_gradient(low = "white", high = "gray20", name = "E. lenta DSM 2243 abundance\n(mean normalized OD600)") + theme(axis.text.y = element_blank(), 
                                                                                                                                               axis.title.y = element_blank())

ggsave(growth_plot_heatmap, file = paste0(outdir, "growth_plot_timecourse_heatmap.pdf"), width = 5, height = 1.2)

growth_plot_heatmap

summary_data[TimePoint==6 & AcConc==1 & Strain == "2243" , table(CF_superclass)]
## CF_superclass
##                                           
##                                         2 
##                 Alkaloids and derivatives 
##                                         6 
## Lignans, neolignans and related compounds 
##                                         3 
##           Lipids and lipid-like molecules 
##                                         1 
##                                no matches 
##                                      3874 
##   Nucleosides, nucleotides, and analogues 
##                                        43 
##             Organic acids and derivatives 
##                                       221 
##                Organic nitrogen compounds 
##                                         2 
##                  Organic oxygen compounds 
##                                         4 
##              Organoheterocyclic compounds 
##                                        91 
##          Phenylpropanoids and polyketides 
##                                         2
summary_data[TimePoint==6 & AcConc==1 & Strain == "2243" , table(MSI)]
## MSI
##         1  1,4    2  2,4    3    4 
## 4090   67    4   35    2   42    9
#### Add Other bucket to superclass annotation
class_counts <- summary_data[TimePoint==6 & AcConc==1 & Strain == "2243", length(unique(IDUniq)), by = CF_superclass][order(V1, decreasing = T)]
summary_data[,CF_superclass2:=ifelse(CF_superclass %in% class_counts[V1 < 5, CF_superclass], "Other", CF_superclass)]

#### Volcano plot
volcano_plot_gnps <- ggplot(summary_data[TimePoint==6 & AcConc==1 & Strain == "2243" & CF_superclass == "no matches"], aes(x = log2FC_adj, y = -1*log10(p.value))) +
  geom_vline(xintercept = 1, linetype = 2) + geom_vline(xintercept = -1, linetype = 2) +
  geom_hline(yintercept = 0.6, linetype = 2) +
  geom_point(shape = 21, fill = "gray30", alpha = 0.8, size= 1.2) + geom_point(data = summary_data[TimePoint==6 & AcConc==1 & Strain == "2243" & CF_superclass != "no matches"], aes(fill = CF_superclass2), shape = 21, size = 1.8) +
  scale_fill_manual(values = c(brewer.pal(12, "Set3")[c(1:2,4:8, 10:12)]), name = "Superclass") + 
  theme(legend.position = "bottom") + guides(fill = guide_legend(ncol = 1)) + 
  xlab("log2 fold change E. lenta DSM 2243 \nvs. sterile controls at 50hr")

ggsave(volcano_plot_gnps, file = paste0(outdir, "volcano_plot_gnps.pdf"), width = 4, height = 5)

volcano_plot_gnps

#### Summary stats
summary_data[!is.na(p.value) & TimePoint==6 & AcConc==1 & Strain == "2243", table(FDRCorrect < 0.1 )]
## 
## FALSE  TRUE 
##  3483   612
summary_data[!is.na(p.value) & TimePoint==6 & AcConc==1 & Strain == "2243" & FDRCorrect < 0.1, table(log2FC > 0)]
## 
## FALSE  TRUE 
##   168   444
summary_data[!is.na(p.value) & TimePoint==6 & AcConc==1 & Strain == "2243" & FDRCorrect < 0.1 & log2FC < 0 & MSI != ""]
##    IonMode AlignmentID                   IDUniq Strain AcConc TimePoint
## 1:     Pos        1069 146.09215_7.621_1069_Pos   2243      1         6
## 2:     Pos        1158 152.05757_6.515_1158_Pos   2243      1         6
## 3:     Pos        1543 175.11842_9.472_1543_Pos   2243      1         6
## 4:     Pos        2624  272.17151_9.39_2624_Pos   2243      1         6
## 5:     Pos        2635 274.18716_9.265_2635_Pos   2243      1         6
## 6:     Pos        2755  293.09756_9.69_2755_Pos   2243      1         6
## 7:     Pos        2898 322.18713_8.665_2898_Pos   2243      1         6
## 8:     Pos        3122  377.14465_6.11_3122_Pos   2243      1         6
##       AvgMZ AvgRt Adduct MSI                         MetName    Formula
## 1: 146.0922 7.621 [M+H]+   1         4-Guanidinobutyric acid       null
## 2: 152.0576 6.515 [M+H]+   1                         Guanine       null
## 3: 175.1184 9.472 [M+H]+   1                        Arginine       null
## 4: 272.1715 9.390 [M+H]+   3                         Pro-Arg C11H21N5O3
## 5: 274.1872 9.265 [M+H]+   3                         Val-Arg C11H23N5O3
## 6: 293.0976 9.690 [M+H]+   3 Ethylenediaminetetraacetic acid C10H16N2O8
## 7: 322.1871 8.665 [M+H]+   3                         Arg-Phe C15H23N5O3
## 8: 377.1447 6.110 [M+H]+   1                      Riboflavin       null
##                       INCHIKEY
## 1: TUHVEAJXIMEOSA-UHFFFAOYSA-N
## 2: UYTPUPDQBNUYGX-UHFFFAOYSA-N
## 3: ODKSFYDXXFIFQN-BYPYZUCNSA-N
## 4: HMNSRTLZAJHSIK-YUMQZZPRSA-N
## 5: IBIDRSSEHFLGSD-YUMQZZPRSA-N
## 6: KCXVZYZYPLLWCC-UHFFFAOYSA-N
## 7: PQBHGSGQZSOLIR-RYUDHWBXSA-N
## 8: AUNGANRZJHBGPY-SCRDCRAPSA-N
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                MS/MS spectrum
## 1:                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          58.06606:72922 60.05321:66028 60.0569:599196 64.89349:41553 69.03465:55517 86.05511:114973 86.06149:1571713 87.03905:158058 87.04552:2257787 88.04843:97862 100.02277:85739 100.11342:78052 104.07082:613889 111.05553:232843 128.08371:426668 129.06721:102166 131.96037:44959 146.09225:3108444
## 2:                                                                                                                                                                                                                                                                                                                                                                                                                      51.32462:2113 56.05073:3474 58.06278:4488 58.06643:23230 59.07427:28091 64.85785:2362 89.08133:2599 106.04996:4090 110.06185:3784 110.07248:3325 124.07721:3926 128.04654:4485 134.06007:20088 135.03186:5714 135.39624:2446 136.07767:3755 151.06358:7428 151.08888:10206 151.12454:3074 152.03471:5793 152.04799:61400 152.05885:237983 152.08395:5470 152.09494:7853 152.10806:4838 152.12015:4354
## 3:                                                                                                                                                                                                                                                                                                                                                                                                                                                                            60.05695:63174348 70.06654:80183040 71.06964:1397201 72.08225:5456357 75.7781:1277058 97.0774:1548582 112.08871:6907542 114.10384:1522407 115.08826:1615821 116.07243:63837528 117.07515:1738751 130.09753:24050896 131.1311:970662 153.58105:837189 157.10864:4184034 158.09486:21616900 159.09869:914629 175.09473:2739370 175.11891:96326112
## 4:                                                                                                                                                                                                                                            50.69818:4404 60.05609:77404 70.06555:492721 71.06878:10967 72.08119:6452 96.16372:4730 98.06012:5392 99.09158:7022 107.7388:5017 112.08691:77599 113.07125:8681 114.10253:12101 115.08661:62883 116.07067:78051 118.41836:5492 129.71727:4949 130.09721:26590 133.09711:8301 140.08176:97906 143.08119:6815 157.10837:27301 158.09212:113444 175.11874:970866 176.12241:35772 185.10306:40925 194.12886:15717 195.11305:25603 210.12369:11867 212.13937:19653 213.12263:27584 230.1483:10720 254.16022:61830 254.24603:6216 255.14436:134534 256.14786:11544 272.17126:1426461
## 5:                                                                                                                    55.0547:15463 60.05608:102951 66.50826:4355 67.29038:4059 70.06551:124004 72.08118:465248 73.08464:14998 88.07607:10784 97.07624:5000 106.08628:7751 112.08704:41433 113.07136:8966 114.10278:22531 115.08649:58367 116.0705:101542 116.07899:8226 130.0974:30870 133.09721:11452 142.09738:175608 143.0811:5341 157.1086:20760 158.09216:176379 158.10696:9209 159.09474:6943 169.13359:8064 175.11868:862378 176.12187:29410 185.10313:17329 197.1277:6440 203.02045:5161 211.15466:6815 212.13954:6743 214.15524:21786 215.13882:34314 230.19704:6247 232.16603:5204 239.14957:60627 240.13196:7829 256.17685:21141 256.26334:8640 257.16022:148428 258.1636:12321 274.18695:1731266 274.2738:160767
## 6:                                                                                                                                                                                                                                                                                                                                                   56.04991:5069 60.04483:13243 71.19521:2821 74.06039:26183 86.06026:50609 88.03956:59341 88.09402:3001 102.0549:22071 104.07059:8826 114.04634:6071 114.05495:82077 114.06402:4340 115.0585:2801 118.08605:12960 132.06537:593407 133.069:18803 134.04456:9357 136.0717:2919 149.02286:6593 160.06017:1124401 161.06354:42334 163.80119:2754 175.11909:5712 195.02496:3848 247.09146:31492 270.28195:3253 270.89111:43685 288.90121:14245 292.13159:4401 293.09741:141391
## 7: 53.71065:3178 60.05614:156553 61.47846:2797 64.54952:2682 70.06559:542410 71.0495:8247 71.06894:10802 72.08135:4733 81.07046:3948 83.0856:3533 87.09188:4947 88.07629:7913 97.07616:13664 97.10152:3768 98.06018:12706 104.89079:3000 112.08708:113797 113.07162:3062 113.09048:4418 115.08681:28664 116.07069:30090 120.08089:62017 121.08438:3996 130.09737:11300 140.08163:29556 148.67773:3283 157.10822:48823 158.09244:43480 166.08607:12178 175.11884:327435 176.12074:10676 190.09668:21014 200.10699:8684 205.19719:2935 217.13278:5985 238.58571:3152 242.12854:11398 245.12872:5465 246.11201:19731 259.15604:19132 262.15506:53502 263.13818:69244 264.14264:6860 287.14981:3966 288.13364:26701 289.13629:4303 304.17535:5316 305.16046:204370 306.16229:20619 321.19998:3200 321.31458:7245 322.18716:917216
## 8:                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     55.63405:2420 70.11493:2221 71.05062:3480 71.79011:2389 75.04574:2884 91.69588:2441 99.04575:4876 106.44118:2846 116.24726:2947 116.84214:2390 172.08841:2806 213.25253:3002 223.85902:2529 231.76869:2711 237.5387:3531 243.08759:58808 244.09593:3014 377.09677:5826 377.14499:77042
##              BK Time MinFeatValue    meanValue      sdValue   minValue
## 1:    76339.444   50       323322   88177336.0  10629261.05   80670080
## 2:           NA   50       109602     457388.3     24303.33     430812
## 3: 28955131.847   50     19862148 2159625941.3 232124477.95 1947554816
## 4:           NA   50       330950    1194353.7     80408.71    1102820
## 5:     1628.953   50         6011      27667.0     14420.65      11415
## 6:        0.000   50         1661          0.0         0.00          0
## 7:        0.000   50        25579          0.0         0.00          0
## 8:        0.000   50         9857          0.0         0.00          0
##      maxValue medianValue meanLog10Value sdLog10Value minLog10Value
## 1:  100340008    83521920       7.943731   0.05087158      7.907147
## 2:     478482      462871       5.685184   0.02196743      5.661067
## 3: 2407616256  2123706752       9.333729   0.04620032      9.290596
## 4:    1253602     1226639       6.105635   0.02781781      6.073923
## 5:      38933       32653       4.417139   0.26748493      4.111187
## 6:          0           0       2.618310   0.00000000      2.618310
## 7:          0           0       3.805824   0.00000000      3.805824
## 8:          0           0       3.391685   0.00000000      3.391685
##    maxLog10Value medianLog10Value ctrl_meanValue ctrl_sdValue       log2FC
## 1:      8.001824         7.922221   8.837948e+07 6.727066e+07 -0.003300611
## 2:      5.704050         5.690437   7.367430e+06 5.671251e+06 -3.931088706
## 3:      9.382482         9.328109   2.722777e+09 1.594957e+09 -0.333612641
## 4:      6.125917         6.117065   1.211139e+06 9.127339e+05 -0.018838030
## 5:      4.606766         4.533464   2.978967e+04 2.402780e+04 -0.101340058
## 6:      2.618310         2.618310   2.870047e+05 2.885599e+05 -9.434964153
## 7:      3.805824         3.805824   2.694659e+06 2.044094e+06 -8.722418853
## 8:      3.391685         3.391685   5.442400e+04 5.577482e+04 -4.528910170
##    ctrlMeanTP1To4 ctrlSDTP1To4  log2FC_adj   estimate estimate1 estimate2
## 1:    144402854.0   20678641.0  -0.7111049 -0.2433690  7.943731  8.187100
## 2:     10977001.8     552236.8  -4.5045805 -1.3605009  5.685184  7.045685
## 3:   3711423680.0  220317999.1  -0.7798068 -0.2305154  9.333729  9.564244
## 4:      3468554.4     251964.2  -1.4754824 -0.4433025  6.105635  6.548937
## 5:      3704056.4     267599.9  -6.9890744 -2.1379656  4.417139  6.555104
## 6:      1201953.8     352478.9 -11.4996120 -3.3953950  2.618310  6.013705
## 7:      4227198.2     221295.1  -9.3707789 -2.8272269  3.805824  6.633050
## 8:       217209.9      29914.0  -6.4780728 -1.9021723  3.391685  5.293857
##      statistic      p.value   FDRCorrect
## 1:   -4.892989 1.008098e-02 0.0780370841
## 2:  -74.415340 1.990020e-07 0.0004074567
## 3:   -8.394682 9.808226e-03 0.0765041646
## 4:  -25.157460 2.292129e-04 0.0080224498
## 5:  -13.812490 5.016011e-03 0.0500989411
## 6:  -54.607742 3.351762e-04 0.0093499440
## 7: -263.875552 1.436125e-05 0.0024662376
## 8:  -74.924454 1.780889e-04 0.0073246647
##                                                                                                      Compound_Name
## 1:                                                                                                                
## 2:                                                                                                                
## 3:                                    Massbank:PB000419 Arginine|2-amino-5-(diaminomethylideneamino)pentanoic acid
## 4:                                                                           Spectral Match to Pro-Arg from NIST14
## 5:                                                                           Spectral Match to Val-Arg from NIST14
## 6:                                                                            Ethylenediaminetetraacetic acid EDTA
## 7:                                                                           Spectral Match to Arg-Phe from NIST14
## 8: Massbank:RP013102 Riboflavin|7,8-dimethyl-10-[(2S,3S,4R)-2,3,4,5-tetrahydroxypentyl]benzo[g]pteridine-2,4-dione
##           CF_kingdom                 CF_superclass
## 1: Organic compounds Organic acids and derivatives
## 2:        no matches                    no matches
## 3: Organic compounds Organic acids and derivatives
## 4: Organic compounds Organic acids and derivatives
## 5: Organic compounds Organic acids and derivatives
## 6: Organic compounds Organic acids and derivatives
## 7: Organic compounds Organic acids and derivatives
## 8: Organic compounds  Organoheterocyclic compounds
##                            CF_class                           CF_subclass
## 1: Carboxylic acids and derivatives  Amino acids, peptides, and analogues
## 2:                       no matches                            no matches
## 3: Carboxylic acids and derivatives  Amino acids, peptides, and analogues
## 4: Carboxylic acids and derivatives  Amino acids, peptides, and analogues
## 5: Carboxylic acids and derivatives  Amino acids, peptides, and analogues
## 6: Carboxylic acids and derivatives Tetracarboxylic acids and derivatives
## 7: Carboxylic acids and derivatives  Amino acids, peptides, and analogues
## 8:       Pteridines and derivatives        Alloxazines and isoalloxazines
##                               CF_Dparent                CF_superclass2
## 1:            N-acyl-L-alpha-amino acids Organic acids and derivatives
## 2:                            no matches                    no matches
## 3:              Arginine and derivatives Organic acids and derivatives
## 4:                            Dipeptides Organic acids and derivatives
## 5:                            Dipeptides Organic acids and derivatives
## 6: Tetracarboxylic acids and derivatives Organic acids and derivatives
## 7:                            Dipeptides Organic acids and derivatives
## 8:                               Flavins  Organoheterocyclic compounds
summary_data[!is.na(p.value) & TimePoint==6 & AcConc==1 & Strain == "2243", table(MSI=="", FDRCorrect < 0.1)]
##        
##         FALSE TRUE
##   FALSE    95   38
##   TRUE   3388  574
summary_data[!is.na(p.value) & TimePoint==6 & AcConc==1 & Strain == "2243", fisher.test(table(MSI != "", FDRCorrect < 0.1))]
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(MSI != "", FDRCorrect < 0.1)
## p-value = 3.558e-05
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  1.559209 3.512528
## sample estimates:
## odds ratio 
##   2.360247
summary_data[!is.na(p.value) & TimePoint==6 & AcConc==1 & Strain == "2243" & FDRCorrect < 0.1 & log2FC < 0 & MSI != ""]
##    IonMode AlignmentID                   IDUniq Strain AcConc TimePoint
## 1:     Pos        1069 146.09215_7.621_1069_Pos   2243      1         6
## 2:     Pos        1158 152.05757_6.515_1158_Pos   2243      1         6
## 3:     Pos        1543 175.11842_9.472_1543_Pos   2243      1         6
## 4:     Pos        2624  272.17151_9.39_2624_Pos   2243      1         6
## 5:     Pos        2635 274.18716_9.265_2635_Pos   2243      1         6
## 6:     Pos        2755  293.09756_9.69_2755_Pos   2243      1         6
## 7:     Pos        2898 322.18713_8.665_2898_Pos   2243      1         6
## 8:     Pos        3122  377.14465_6.11_3122_Pos   2243      1         6
##       AvgMZ AvgRt Adduct MSI                         MetName    Formula
## 1: 146.0922 7.621 [M+H]+   1         4-Guanidinobutyric acid       null
## 2: 152.0576 6.515 [M+H]+   1                         Guanine       null
## 3: 175.1184 9.472 [M+H]+   1                        Arginine       null
## 4: 272.1715 9.390 [M+H]+   3                         Pro-Arg C11H21N5O3
## 5: 274.1872 9.265 [M+H]+   3                         Val-Arg C11H23N5O3
## 6: 293.0976 9.690 [M+H]+   3 Ethylenediaminetetraacetic acid C10H16N2O8
## 7: 322.1871 8.665 [M+H]+   3                         Arg-Phe C15H23N5O3
## 8: 377.1447 6.110 [M+H]+   1                      Riboflavin       null
##                       INCHIKEY
## 1: TUHVEAJXIMEOSA-UHFFFAOYSA-N
## 2: UYTPUPDQBNUYGX-UHFFFAOYSA-N
## 3: ODKSFYDXXFIFQN-BYPYZUCNSA-N
## 4: HMNSRTLZAJHSIK-YUMQZZPRSA-N
## 5: IBIDRSSEHFLGSD-YUMQZZPRSA-N
## 6: KCXVZYZYPLLWCC-UHFFFAOYSA-N
## 7: PQBHGSGQZSOLIR-RYUDHWBXSA-N
## 8: AUNGANRZJHBGPY-SCRDCRAPSA-N
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                MS/MS spectrum
## 1:                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          58.06606:72922 60.05321:66028 60.0569:599196 64.89349:41553 69.03465:55517 86.05511:114973 86.06149:1571713 87.03905:158058 87.04552:2257787 88.04843:97862 100.02277:85739 100.11342:78052 104.07082:613889 111.05553:232843 128.08371:426668 129.06721:102166 131.96037:44959 146.09225:3108444
## 2:                                                                                                                                                                                                                                                                                                                                                                                                                      51.32462:2113 56.05073:3474 58.06278:4488 58.06643:23230 59.07427:28091 64.85785:2362 89.08133:2599 106.04996:4090 110.06185:3784 110.07248:3325 124.07721:3926 128.04654:4485 134.06007:20088 135.03186:5714 135.39624:2446 136.07767:3755 151.06358:7428 151.08888:10206 151.12454:3074 152.03471:5793 152.04799:61400 152.05885:237983 152.08395:5470 152.09494:7853 152.10806:4838 152.12015:4354
## 3:                                                                                                                                                                                                                                                                                                                                                                                                                                                                            60.05695:63174348 70.06654:80183040 71.06964:1397201 72.08225:5456357 75.7781:1277058 97.0774:1548582 112.08871:6907542 114.10384:1522407 115.08826:1615821 116.07243:63837528 117.07515:1738751 130.09753:24050896 131.1311:970662 153.58105:837189 157.10864:4184034 158.09486:21616900 159.09869:914629 175.09473:2739370 175.11891:96326112
## 4:                                                                                                                                                                                                                                            50.69818:4404 60.05609:77404 70.06555:492721 71.06878:10967 72.08119:6452 96.16372:4730 98.06012:5392 99.09158:7022 107.7388:5017 112.08691:77599 113.07125:8681 114.10253:12101 115.08661:62883 116.07067:78051 118.41836:5492 129.71727:4949 130.09721:26590 133.09711:8301 140.08176:97906 143.08119:6815 157.10837:27301 158.09212:113444 175.11874:970866 176.12241:35772 185.10306:40925 194.12886:15717 195.11305:25603 210.12369:11867 212.13937:19653 213.12263:27584 230.1483:10720 254.16022:61830 254.24603:6216 255.14436:134534 256.14786:11544 272.17126:1426461
## 5:                                                                                                                    55.0547:15463 60.05608:102951 66.50826:4355 67.29038:4059 70.06551:124004 72.08118:465248 73.08464:14998 88.07607:10784 97.07624:5000 106.08628:7751 112.08704:41433 113.07136:8966 114.10278:22531 115.08649:58367 116.0705:101542 116.07899:8226 130.0974:30870 133.09721:11452 142.09738:175608 143.0811:5341 157.1086:20760 158.09216:176379 158.10696:9209 159.09474:6943 169.13359:8064 175.11868:862378 176.12187:29410 185.10313:17329 197.1277:6440 203.02045:5161 211.15466:6815 212.13954:6743 214.15524:21786 215.13882:34314 230.19704:6247 232.16603:5204 239.14957:60627 240.13196:7829 256.17685:21141 256.26334:8640 257.16022:148428 258.1636:12321 274.18695:1731266 274.2738:160767
## 6:                                                                                                                                                                                                                                                                                                                                                   56.04991:5069 60.04483:13243 71.19521:2821 74.06039:26183 86.06026:50609 88.03956:59341 88.09402:3001 102.0549:22071 104.07059:8826 114.04634:6071 114.05495:82077 114.06402:4340 115.0585:2801 118.08605:12960 132.06537:593407 133.069:18803 134.04456:9357 136.0717:2919 149.02286:6593 160.06017:1124401 161.06354:42334 163.80119:2754 175.11909:5712 195.02496:3848 247.09146:31492 270.28195:3253 270.89111:43685 288.90121:14245 292.13159:4401 293.09741:141391
## 7: 53.71065:3178 60.05614:156553 61.47846:2797 64.54952:2682 70.06559:542410 71.0495:8247 71.06894:10802 72.08135:4733 81.07046:3948 83.0856:3533 87.09188:4947 88.07629:7913 97.07616:13664 97.10152:3768 98.06018:12706 104.89079:3000 112.08708:113797 113.07162:3062 113.09048:4418 115.08681:28664 116.07069:30090 120.08089:62017 121.08438:3996 130.09737:11300 140.08163:29556 148.67773:3283 157.10822:48823 158.09244:43480 166.08607:12178 175.11884:327435 176.12074:10676 190.09668:21014 200.10699:8684 205.19719:2935 217.13278:5985 238.58571:3152 242.12854:11398 245.12872:5465 246.11201:19731 259.15604:19132 262.15506:53502 263.13818:69244 264.14264:6860 287.14981:3966 288.13364:26701 289.13629:4303 304.17535:5316 305.16046:204370 306.16229:20619 321.19998:3200 321.31458:7245 322.18716:917216
## 8:                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     55.63405:2420 70.11493:2221 71.05062:3480 71.79011:2389 75.04574:2884 91.69588:2441 99.04575:4876 106.44118:2846 116.24726:2947 116.84214:2390 172.08841:2806 213.25253:3002 223.85902:2529 231.76869:2711 237.5387:3531 243.08759:58808 244.09593:3014 377.09677:5826 377.14499:77042
##              BK Time MinFeatValue    meanValue      sdValue   minValue
## 1:    76339.444   50       323322   88177336.0  10629261.05   80670080
## 2:           NA   50       109602     457388.3     24303.33     430812
## 3: 28955131.847   50     19862148 2159625941.3 232124477.95 1947554816
## 4:           NA   50       330950    1194353.7     80408.71    1102820
## 5:     1628.953   50         6011      27667.0     14420.65      11415
## 6:        0.000   50         1661          0.0         0.00          0
## 7:        0.000   50        25579          0.0         0.00          0
## 8:        0.000   50         9857          0.0         0.00          0
##      maxValue medianValue meanLog10Value sdLog10Value minLog10Value
## 1:  100340008    83521920       7.943731   0.05087158      7.907147
## 2:     478482      462871       5.685184   0.02196743      5.661067
## 3: 2407616256  2123706752       9.333729   0.04620032      9.290596
## 4:    1253602     1226639       6.105635   0.02781781      6.073923
## 5:      38933       32653       4.417139   0.26748493      4.111187
## 6:          0           0       2.618310   0.00000000      2.618310
## 7:          0           0       3.805824   0.00000000      3.805824
## 8:          0           0       3.391685   0.00000000      3.391685
##    maxLog10Value medianLog10Value ctrl_meanValue ctrl_sdValue       log2FC
## 1:      8.001824         7.922221   8.837948e+07 6.727066e+07 -0.003300611
## 2:      5.704050         5.690437   7.367430e+06 5.671251e+06 -3.931088706
## 3:      9.382482         9.328109   2.722777e+09 1.594957e+09 -0.333612641
## 4:      6.125917         6.117065   1.211139e+06 9.127339e+05 -0.018838030
## 5:      4.606766         4.533464   2.978967e+04 2.402780e+04 -0.101340058
## 6:      2.618310         2.618310   2.870047e+05 2.885599e+05 -9.434964153
## 7:      3.805824         3.805824   2.694659e+06 2.044094e+06 -8.722418853
## 8:      3.391685         3.391685   5.442400e+04 5.577482e+04 -4.528910170
##    ctrlMeanTP1To4 ctrlSDTP1To4  log2FC_adj   estimate estimate1 estimate2
## 1:    144402854.0   20678641.0  -0.7111049 -0.2433690  7.943731  8.187100
## 2:     10977001.8     552236.8  -4.5045805 -1.3605009  5.685184  7.045685
## 3:   3711423680.0  220317999.1  -0.7798068 -0.2305154  9.333729  9.564244
## 4:      3468554.4     251964.2  -1.4754824 -0.4433025  6.105635  6.548937
## 5:      3704056.4     267599.9  -6.9890744 -2.1379656  4.417139  6.555104
## 6:      1201953.8     352478.9 -11.4996120 -3.3953950  2.618310  6.013705
## 7:      4227198.2     221295.1  -9.3707789 -2.8272269  3.805824  6.633050
## 8:       217209.9      29914.0  -6.4780728 -1.9021723  3.391685  5.293857
##      statistic      p.value   FDRCorrect
## 1:   -4.892989 1.008098e-02 0.0780370841
## 2:  -74.415340 1.990020e-07 0.0004074567
## 3:   -8.394682 9.808226e-03 0.0765041646
## 4:  -25.157460 2.292129e-04 0.0080224498
## 5:  -13.812490 5.016011e-03 0.0500989411
## 6:  -54.607742 3.351762e-04 0.0093499440
## 7: -263.875552 1.436125e-05 0.0024662376
## 8:  -74.924454 1.780889e-04 0.0073246647
##                                                                                                      Compound_Name
## 1:                                                                                                                
## 2:                                                                                                                
## 3:                                    Massbank:PB000419 Arginine|2-amino-5-(diaminomethylideneamino)pentanoic acid
## 4:                                                                           Spectral Match to Pro-Arg from NIST14
## 5:                                                                           Spectral Match to Val-Arg from NIST14
## 6:                                                                            Ethylenediaminetetraacetic acid EDTA
## 7:                                                                           Spectral Match to Arg-Phe from NIST14
## 8: Massbank:RP013102 Riboflavin|7,8-dimethyl-10-[(2S,3S,4R)-2,3,4,5-tetrahydroxypentyl]benzo[g]pteridine-2,4-dione
##           CF_kingdom                 CF_superclass
## 1: Organic compounds Organic acids and derivatives
## 2:        no matches                    no matches
## 3: Organic compounds Organic acids and derivatives
## 4: Organic compounds Organic acids and derivatives
## 5: Organic compounds Organic acids and derivatives
## 6: Organic compounds Organic acids and derivatives
## 7: Organic compounds Organic acids and derivatives
## 8: Organic compounds  Organoheterocyclic compounds
##                            CF_class                           CF_subclass
## 1: Carboxylic acids and derivatives  Amino acids, peptides, and analogues
## 2:                       no matches                            no matches
## 3: Carboxylic acids and derivatives  Amino acids, peptides, and analogues
## 4: Carboxylic acids and derivatives  Amino acids, peptides, and analogues
## 5: Carboxylic acids and derivatives  Amino acids, peptides, and analogues
## 6: Carboxylic acids and derivatives Tetracarboxylic acids and derivatives
## 7: Carboxylic acids and derivatives  Amino acids, peptides, and analogues
## 8:       Pteridines and derivatives        Alloxazines and isoalloxazines
##                               CF_Dparent                CF_superclass2
## 1:            N-acyl-L-alpha-amino acids Organic acids and derivatives
## 2:                            no matches                    no matches
## 3:              Arginine and derivatives Organic acids and derivatives
## 4:                            Dipeptides Organic acids and derivatives
## 5:                            Dipeptides Organic acids and derivatives
## 6: Tetracarboxylic acids and derivatives Organic acids and derivatives
## 7:                            Dipeptides Organic acids and derivatives
## 8:                               Flavins  Organoheterocyclic compounds
######## Present/absent plot
summary_data[,PresentInCtrls:=ifelse(ctrlMeanTP1To4 > 5e4, 1, 0)]
summary_data[,PresentInCtrls2:=ifelse(ctrlMeanTP1To4 > 3*BK, 1, 0)]
summary_data[!is.na(p.value) & TimePoint==6 & AcConc==1 & Strain == "2243", table(PresentInCtrls ==1)]
## 
## FALSE  TRUE 
##   427  3668
summary_data[!is.na(p.value) & TimePoint==6 & AcConc==1 & Strain == "2243", table(PresentInCtrls2 ==1 & meanValue > 3*BK)]
## 
## FALSE  TRUE 
##   728  3361
summary_data[!is.na(p.value) & TimePoint==6 & AcConc==1 & Strain == "2243", table(ctrlMeanTP1To4  > 1e5)]
## 
## FALSE  TRUE 
##   682  3413
summary_data[!is.na(p.value) & TimePoint==6 & AcConc==1 & Strain == "2243", table(ctrlMeanTP1To4  > 1e4)]
## 
## FALSE  TRUE 
##   265  3830
summary_data[,Present:=ifelse(meanValue > 3*BK, 1, 0)]
present_in_ctrls_summary_plot <- ggplot(summary_data[Present == 1 & !is.na(FDRCorrect) & Strain == "2243" & AcConc==1], 
                                        aes(x = factor(Time), fill = factor(ifelse(PresentInCtrls2==0, "Absent in controls", "Present in controls")))) + 
  geom_bar(stat = "count", position = "stack") + facet_grid(ifelse(MetName %in% c("unknown", "Unknown"),  "Unidentified", "Identified")~ifelse(IonMode == "Neg", "Negative Mode", 'Positive Mode'), scales = "free_y") + 
  scale_y_continuous(name = "Number of features", expand = c(0,0)) + theme_classic2() + xlab("Time point (hr)") + scale_fill_brewer(palette = "Set1", name = "") +
  theme(axis.text.x = element_text(size=8))

ggsave(present_in_ctrls_summary_plot, file = paste0(outdir, "presentAbsentCtrlsPlotUpdated.pdf"), width = 6, height = 3.6)

present_in_ctrls_summary_plot

############## Diff abundance plot
summary_data[,DiffAbund:=case_when(
  FDRCorrect < 0.1 & log2FC_adj > 0 ~ "Produced", 
  FDRCorrect < 0.1 & log2FC_adj < 0 ~ "Depleted",
  TRUE ~ "None"
)]
diff_abund_summary_plot <- ggplot(summary_data[!is.na(FDRCorrect) & Strain == "2243" & AcConc==1 & DiffAbund != "None"], aes(x = factor(Time), fill = factor(DiffAbund, levels = c("Produced", "Depleted", "None")))) + 
  geom_bar(stat = "count", position = "stack") + facet_grid(ifelse(MetName %in% c("unknown", "Unknown"),  "Unidentified", "Identified")~IonMode, scales = "free_y") + 
  scale_y_continuous(name = "Number of features", expand = c(0,0)) + theme_classic2() + xlab("Time point (hr)") + scale_fill_brewer(palette = "Set1", name = "") +
  theme(axis.text.x = element_text(size=8)) + scale_x_discrete(limits = summary_data[,factor(sort(unique(Time)))])

ggsave(diff_abund_summary_plot, file = paste0(outdir, "DiffAbundCtrlsPlotUpdated.pdf"), width = 5.5, height = 3.6)

diff_abund_summary_plot

diff_abund_mets <- summary_data[TimePoint==6 & AcConc==1 & Strain == "2243" & !is.na(p.value) & abs(log2FC_adj) > 0.75 & FDRCorrect < 0.1, IDUniq]


############################ Heatmap
summary_data[AcConc==1 & Strain == "2243" ,logZscore:=scale(meanLog10Value), by = IDUniq]
mean_data_wide <- dcast(summary_data[AcConc==1 & Strain == "2243" & IDUniq %in% diff_abund_mets], IDUniq+MetName+MSI~Time, value.var = "logZscore")

row_labs <- mean_data_wide[,ifelse(MetName %in% c("Unknown", "unknown"), "", paste0(MetName, " (", MSI, ")"))]
pdf(paste0(outdir, "heatmap_timeCourse_v2.pdf"), width = 5, height = 8)
Heatmap(as.matrix(mean_data_wide[,4:ncol(mean_data_wide), with=F]), cluster_columns = F, 
        row_labels = row_labs, row_names_gp = gpar(fontsize= 8), col = c(brewer.pal(3, "Set1")[2], "white", brewer.pal(3, "Set1")[1]))
dev.off()
## png 
##   2
Heatmap(as.matrix(mean_data_wide[,4:ncol(mean_data_wide), with=F]), cluster_columns = F, 
        row_labels = row_labs, row_names_gp = gpar(fontsize= 8), col = c(brewer.pal(3, "Set1")[2], "white", brewer.pal(3, "Set1")[1]))

diff_abund_id_mets<- summary_data[TimePoint==6 & AcConc==1 & Strain == "2243" & abs(log2FC_adj) > 1 & FDRCorrect < 0.1 & !grepl("known", MetName), IDUniq]

Figure S1 various media analyses

####### Figure S1
named_mets <- data.table(MetName = summary_data[!grepl("known", MetName),sort(unique(MetName))])
named_mets[c(29, 31, 41, 48, 57, 63, 64, 68, 94, 100, 101, 103, 104, 105, 107, 109, 
             115, 118, 125, 126, 127, 131), MediaComponent:=1]  
named_mets[c(29, 31, 41, 48, 57, 63, 64, 68, 94, 100, 101, 103, 104, 105, 107, 109, 
             115, 118, 125, 126, 127, 131), Category:=c("Amino acids", "Vitamins and Cofactors", "Other", rep("Amino acids", 5),
                                                        "Vitamins and Cofactors", "Amino acids", "Other", "Amino acids",
                                                        rep("Vitamins and Cofactors",3), "Amino acids", "Vitamins and Cofactors", 
                                                        rep("Amino acids", 3), "Other", "Amino acids")]
summary_data <- merge(summary_data, named_mets, by = "MetName", all.x = T)

mean_data_wide2 <- dcast(summary_data[AcConc==1 & Strain == "2243"], IDUniq+MetName~Time, value.var = "meanLog10Value")
row_labs <- mean_data_wide2[,ifelse(MetName %in% c("Unknown", "unknown"), "", MetName)]

media_component_plot <- summary_data[MediaComponent==1 & AcConc == 1 & Strain=="2243"] %>% 
  complete(MetName, IonMode, Time) %>% ggplot(aes(y = factor(MetName, levels = rev(sort(named_mets[,MetName]))), x = factor(Time), fill = meanLog10Value)) + facet_grid(.~IonMode, scales = "free") +
  geom_tile() + scale_fill_gradient(low = "white", high = brewer.pal(8, "Purples")[8], na.value = "gray80") +
  xlab("Time (hr)") + ylab("") 

ggsave(media_component_plot, file = paste0(outdir, "media_components_metabolomics.pdf"), width=7, height = 5.5)

media_component_plot

####### compare with Sonnenburg and Bisanz datasets
## Quick look at sonnenburg data
invitro_metadata <- fread("../sonnenburg_in_vitro_metadata.txt") 
samples_interest <- invitro_metadata[grepl("Eggerthella", taxonomy)]

invitro_data <- fread("../sonnenburg_in_vitro_data.txt", fill = T)
invitro_sub <- invitro_data[V1 %in% samples_interest[,V1]]
invitro_sub <- melt(invitro_sub, id.var = "V1", variable.name = "compound")
invitro_sub <- invitro_sub[!is.na(value)]
invitro_summary <- invitro_sub[,list(median(value), mean(value), sd(value)), by=compound]
setnames(invitro_summary, c("V1", "V2", "V3"), c("medianLog2FC", "mean_log2FC", "sd_log2FC"))
invitro_summary[,Prod:=ifelse(medianLog2FC > 0.5, 1, 0)]
invitro_summary[,Deplete:=ifelse(medianLog2FC < -0.5, 1, 0)]
invitro_summary[,Outcome:=case_when(
  Prod==1 ~ "Produced",
  Deplete==1 ~ "Depleted",
  TRUE ~"Unchanged"
)]


summary_data_sub <- summary_data[Strain == "2243" & AcConc==1]
summary_data_sub[,Prod:=ifelse(FDRCorrect < 0.1 & log2FC_adj > 0.5, 1, 0)]
summary_data_sub[,Deplete:=ifelse(FDRCorrect < 0.1 & log2FC_adj < -0.5, 1, 0)]
summary_data_sub[,Outcome:=case_when(
  Prod==1 ~ "Produced",
  Deplete==1 ~ "Depleted",
  TRUE ~"Unchanged"
)]

prev_data <- fread("../../ElentaStrainMetabolomics/DiffAbundAllSummary_filtered_finalNorm.tsv")
prev_data[,Prod2:=ifelse(T_FDR_pvalue < 0.1 & log2FC > 0.5, 1, 0)]
prev_data[,Deplete2:=ifelse(T_FDR_pvalue < 0.1 & log2FC < -0.5, 1, 0)]
prev_data[,Outcome:=case_when(
  Prod2==1 ~ "Produced",
  Deplete2==1 ~ "Depleted",
  TRUE ~"Unchanged"
)]
prev_data_sub <- prev_data[SampleID == "Eggerthella_lenta_UCSF2243"]
prev_data_sub[,MetName:=DB_Compound_Name1]
prev_data_sub[is.na(MetName), MetName:="Unknown"]
invitro_summary[,MetName:=compound]

counts_all <- rbind(
  data.table(summary_data_sub[TimePoint == 6,list(IDUniq, MetName, Outcome)], Dataset = "EDM1"),
  data.table(prev_data_sub[,list(FeatureID, MetName, Outcome)], Dataset = "Bisanz et al\n(ISP-2+Arg)"),
  data.table(invitro_summary[,list(compound, Outcome)], Dataset = "Han et al\n(Mega)"), fill = T
)
counts_all[,Dataset:=factor(Dataset, levels = c(
  "Bisanz et al\n(ISP-2+Arg)",
  "Han et al\n(Mega)",
  "EDM1"
))]
# blue = HSL 7, 2, 255
# red = rgb 255, 4, 1

count1 <- ggplot(counts_all[Outcome != "Unchanged"], aes(x = Dataset, fill = Outcome)) + geom_bar(position = "dodge") + xlab("In vitro metabolomics dataset") + 
  ylab("Count") + scale_fill_manual(values = brewer.pal(3, "Set1")[c(2,1)]) + theme(axis.text = element_text(color = "black", size = 8)) +
  scale_y_continuous(expand = c(0,0))

count2 <- ggplot(counts_all[!grepl("known", MetName) & Outcome != "Unchanged"], aes(x = Dataset, fill = Outcome)) + 
  geom_bar(position = position_dodge(preserve = "single")) + xlab("In vitro metabolomics dataset")+ ylab("Number of identified metabolite features") +
  scale_fill_manual(values =  brewer.pal(3, "Set1")[c(2,1)], name = "")+ theme(axis.text = element_text(color = "black", size = 8)) + scale_y_continuous(expand = c(0,0))
compare_plot <- count1+guides(fill = "none") + count2

ggsave(compare_plot, file = paste0(outdir, "compare_studies_plot.pdf"), width = 6, height = 3)

compare_plot

############## Trajectories Figure S1E
hclust_results <- hclust(dist(as.matrix(mean_data_wide[,4:ncol(mean_data_wide), with=F])))
clusts <- cutree(hclust_results, h = 1.6)
clust_table <- data.table(IDUniq = mean_data_wide[,IDUniq], Clust = clusts)
plot_dat <- summary_data[AcConc==1 & Strain == "2243" & IDUniq %in% diff_abund_mets]
plot_dat <- merge(plot_dat, clust_table, by = "IDUniq", all.x = T)
plot_dat[,Clust2:=factor(paste0("Cluster ", Clust), levels = c(paste0("Cluster ", 1:20)))]
gnps_color_scale <- c(brewer.pal(12, "Set3")[c(1:2,4:6)], "gray40")
names(gnps_color_scale) <- c(summary_data[CF_superclass2 != "no matches", sort(unique(CF_superclass2))], "no matches")

good_clusts <- plot_dat[,list(length(unique(IDUniq)), length(unique(MetName[!grepl("known", MetName)]))), by = Clust2][V1 > 4 & V2 > 0, Clust2]
trajectory_plot <- ggplot(plot_dat[CF_superclass2 == "no matches" & Clust2 %in% good_clusts], 
                          aes(x = Time, y = logZscore, group = IDUniq, color = CF_superclass2)) + 
  geom_line(alpha = 0.5) + geom_line(data = plot_dat[CF_superclass2 != "no matches"& Clust2 %in% good_clusts]) + 
  facet_grid(.~factor(Clust2, levels = good_clusts)) + scale_color_manual(values= gnps_color_scale, name = "Chemical superclass") + 
  xlab("Time (hr)") + ylab("Scaled log peak height") #+ 

clust_tab2 <- plot_dat[MSI != "",list(IDUniq, MetName, MSI, Clust)] %>% distinct()
clust_tab2[,DisplayName:=paste0(MetName, " (", MSI, ")")]
clust_tab2[,CompRank:=rank(MSI, MetName, ties.method = "first"), by = Clust]
clust_tab2[,CompRank:=max(CompRank)-CompRank]
clust_tab2[,Clust2:=factor(paste0("Cluster ", Clust), levels = good_clusts)]
clust_labels <- ggplot(clust_tab2[Clust2 %in% good_clusts], aes(x = 1, y = CompRank, label = DisplayName)) + geom_text(size = 3) + facet_grid(~Clust2, drop = F) + theme_nothing()
trajectory_plot_all <- trajectory_plot + clust_labels + plot_layout(nrow = 2, heights = c(1, 1.3))

ggsave(trajectory_plot_all, file = paste0(outdir, "hclust_trajectories.pdf"), width = 11, height = 3.5)

trajectory_plot_all

2 Figure S2 (LOO Growth data)

rm(list = ls())
### Reanalyze all growth experiments
library(data.table)
library(tidyverse)
library(growthcurver)
library(cowplot)
library(patchwork)
library(readxl)
library(RColorBrewer)
library(ggpubr)
library(ggbeeswarm)

source("scripts/growth_curve_functions.R")
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:cowplot':
## 
##     stamp
## The following objects are masked from 'package:data.table':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday, week,
##     yday, year
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
sessionInfo()
## R version 4.1.1 (2021-08-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] lubridate_1.8.0       ggbeeswarm_0.6.0      growthcurver_0.3.1   
##  [4] KEGGREST_1.32.0       patchwork_1.1.1       ggrepel_0.9.1        
##  [7] ComplexHeatmap_2.13.1 ggpubr_0.4.0          RColorBrewer_1.1-2   
## [10] fastcluster_1.2.3     vegan_2.6-2           lattice_0.20-45      
## [13] permute_0.9-5         webchem_1.1.2         forcats_0.5.1        
## [16] stringr_1.4.0         dplyr_1.0.7           purrr_0.3.4          
## [19] readr_2.1.2           tidyr_1.2.0           tibble_3.1.7         
## [22] tidyverse_1.3.1       readxl_1.3.1          cowplot_1.1.1        
## [25] ggplot2_3.3.6         data.table_1.14.2    
## 
## loaded via a namespace (and not attached):
##  [1] colorspace_2.0-3       ggsignif_0.6.3         rjson_0.2.21          
##  [4] ellipsis_0.3.2         circlize_0.4.15        XVector_0.34.0        
##  [7] GlobalOptions_0.1.2    fs_1.5.0               clue_0.3-61           
## [10] rstudioapi_0.13        farver_2.1.0           fansi_1.0.3           
## [13] xml2_1.3.2             codetools_0.2-18       splines_4.1.1         
## [16] doParallel_1.0.17      knitr_1.39             jsonlite_1.8.0        
## [19] Cairo_1.6-0            broom_0.8.0            cluster_2.1.3         
## [22] dbplyr_2.1.1           png_0.1-7              data.tree_1.0.0       
## [25] compiler_4.1.1         httr_1.4.2             backports_1.2.1       
## [28] assertthat_0.2.1       Matrix_1.4-1           fastmap_1.1.0         
## [31] cli_3.3.0              htmltools_0.5.2        tools_4.1.1           
## [34] gtable_0.3.0           glue_1.6.2             GenomeInfoDbData_1.2.7
## [37] Rcpp_1.0.8.3           carData_3.0-5          cellranger_1.1.0      
## [40] jquerylib_0.1.4        vctrs_0.4.1            Biostrings_2.62.0     
## [43] nlme_3.1-159           iterators_1.0.13       xfun_0.31             
## [46] rvest_1.0.2            lifecycle_1.0.0        rstatix_0.7.0         
## [49] zlibbioc_1.40.0        MASS_7.3-57            scales_1.1.1          
## [52] hms_1.1.1              parallel_4.1.1         yaml_2.3.5            
## [55] sass_0.4.1             stringi_1.7.6          highr_0.9             
## [58] S4Vectors_0.32.4       foreach_1.5.2          BiocGenerics_0.40.0   
## [61] shape_1.4.6            GenomeInfoDb_1.30.1    rlang_1.0.2           
## [64] pkgconfig_2.0.3        matrixStats_0.62.0     bitops_1.0-7          
## [67] evaluate_0.15          labeling_0.4.2         tidyselect_1.1.2      
## [70] magrittr_2.0.3         R6_2.5.1               magick_2.7.3          
## [73] IRanges_2.28.0         generics_0.1.0         DBI_1.1.1             
## [76] pillar_1.7.0           haven_2.5.0            withr_2.5.0           
## [79] mgcv_1.8-40            abind_1.4-5            RCurl_1.98-1.7        
## [82] modelr_0.1.8           crayon_1.4.1           car_3.0-13            
## [85] utf8_1.2.2             tzdb_0.3.0             rmarkdown_2.14        
## [88] GetoptLong_1.0.5       reprex_2.0.1           digest_0.6.29         
## [91] stats4_4.1.1           munsell_0.5.0          beeswarm_0.4.0        
## [94] vipor_0.4.5            bslib_0.3.1
main_dir <- "../GrowthExperiments/"
results_tables <- data.table(read_xlsx(paste0(main_dir, "allGrowthCurveData2021-06-08.xlsx")))
## New names:
## • `` -> `...8`
results_tables <- results_tables[!is.na(ResultsFile)]
outdir <- "fig_s3/"
dir.create(outdir)

all_data <- rbindlist(lapply(1:nrow(results_tables), function(x){
  data <- read_growthcurve_file(paste0(results_tables[x, Directory], "/", results_tables[x, ResultsFile]))
  data[,Experiment:=results_tables[x, Experiment]]
  return(data)
}), fill = T)

all_layouts <- rbindlist(lapply(1:nrow(results_tables), function(x){
  if(grepl("xlsx", results_tables[x, LayoutFile])){
    if(!is.na(results_tables[x, as.numeric(LayoutSheet)])){
      layout <- data.table(read_xlsx(paste0(results_tables[x, Directory], "/", results_tables[x, LayoutFile]), 
                                     sheet = results_tables[x, as.numeric(LayoutSheet)]))
    } else {
      layout1 <- data.table(read_xlsx(paste0(results_tables[x, Directory], "/", results_tables[x, LayoutFile]), 
                                      sheet = 1))
      layout2 <- data.table(read_xlsx(paste0(results_tables[x, Directory], "/", results_tables[x, LayoutFile]), 
                                      sheet = 2))
      layout1[,Type:="Media"]
      layout2[,Type:="Strain"]
      layout <- rbind(layout1[!is.na(Row)], layout2[!is.na(Row)], fill = T)
    }
  } else {
    layout <- fread(paste0(results_tables[x, Directory], "/", results_tables[x, LayoutFile]), header = T)
  }
  if(names(layout)[1] %in% c("...1", "V1")){ setnames(layout, names(layout)[1], "Row")}
  if(!"Type" %in% names(layout)){
    if(ncol(layout) > 13) layout <- layout[,1:13, with=F]
    layout[,Type:=ifelse(is.na(Row)|Row == "", "Strain", "Media")]
    layout[,Row:=sapply(1:nrow(layout), function(x){
      if(!is.na(layout[x, Row]) & layout[x,Row] != ""){
        return(layout[x,Row])
      } else {
        return(layout[x-1, Row])
      }
    })]
  }
  layout <- melt(layout, id.var = c("Row", "Type"), variable.name = "Column")
  layout <- dcast(layout[!is.na(Row)], Row+Column~Type, value.var = "value")
  layout[,Experiment:=results_tables[x, Experiment]]
  return(layout)
}), fill = T)
## New names:
## • `` -> `...14`
## • `` -> `...15`
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## • `` -> `...14`
## • `` -> `...15`
## AcArgUracilRflv and AcMinRflvFollowUps have errors in the strain id here

all_layouts[Experiment=="AddCarboxylicAcids" & Column %in% c(2,6,10), Strain:="c"]
## Fix inconsistent layout issues
all_layouts[grepl("_c$", Media), Strain:="c"]
all_layouts[grepl("_c$", Media), Media:=gsub("_c$", "", Media)]
all_layouts[Experiment %in% c("AminoAcidLOOs", "GMM_LAB_Components2") & grepl("c$", Media), Strain:="c"]
all_layouts[Experiment %in% c("AminoAcidLOOs", "GMM_LAB_Components2") & grepl("c$", Media), Media:=gsub("c$", "", Media)]

all_layouts[!is.na(Media) & Media != "" & is.na(Strain), Strain:="2243"]
all_layouts[Strain == "AB8", Strain:="AB8n2"]
all_layouts[Strain == "Val", Strain:="Valencia"]
all_layouts <- all_layouts[!is.na(Media) & !is.na(Strain)]
all_layouts <- all_layouts[Media != "" & Strain != ""]
aa_names <- data.table(read_xlsx(paste0(results_tables[Experiment == "AminoAcidLOOs", Directory], "/",
                                        results_tables[Experiment == "AminoAcidLOOs", LayoutFile]), 
                                 sheet = results_tables[Experiment == "AminoAcidLOOs", OtherInfoSheet]))
aa_names[,ConditionID:=as.character(ConditionID)]
all_layouts <- merge(all_layouts, aa_names, by.x = "Media", by.y = "ConditionID", all.x = T)
all_layouts[!is.na(Condition), Media:=Condition]
all_layouts[,Condition:=NULL]
all_layouts[,well:=paste0(Row, Column)]

all_layouts <- all_layouts[!(Experiment == "MineralLOOs" & well == "B8")]
all_layouts[Experiment == "AddCarboxylicAcids" & Column %in% c(2, 6, 10), Strain:="c"]
all_layouts[Experiment == "AcArgUracilRflv" & Media == "DM1_50Arg_10Ac", Media:="DM1_50Arg_0Ac"]
all_layouts[Experiment == "AcArgUracilRflv" & Media == "DM1_50Arg_1Ac", Media:="DM1_50Arg_10Ac"]
all_layouts[Experiment == "AcArgUracilRflv", ArgConc:=as.numeric(ifelse(grepl("Arg", Media), gsub("DM1_", "", gsub("Arg.*$", "", Media)), 100))]
all_layouts[Experiment == "AcArgUracilRflv", AcConc:=as.numeric(ifelse(grepl("Ac", Media), gsub("DM1_", "", gsub("[0-9]+Arg_", "", gsub("Ac.*$", "", Media))), 100))]
all_layouts[,BaseDM1Condition:=ifelse(Media %in% c("DM1", "DM1b", "DM1Ac10",  "DM1_Old", "None-base", "GL", "GLHmOld", "DM1_v2", "DM1_VitMix", "Ac1"), "DM1", "Other")]

all_data[Experiment == "AcArgUracilRflv", time:=time+25.5]
growth_data_long <- melt(all_data, id.var = c("time", "temp", "Experiment"), variable.name = "well")
growth_data_long <- merge(growth_data_long, all_layouts, by = c("well", "Experiment"), all.x = T)
growth_data_long <- growth_data_long[!is.na(Media) & !is.na(value)] #wells without metadata were empty

setnames(all_layouts, "Media", "Condition")
setnames(growth_data_long, "Media", "Condition")

growth_data_long[Strain == 'c' & time < 28 & Experiment != "GLUACvariants" & value > 0.17, value:=NA] #remove weird early readings
bad_blank_wells <- growth_data_long[Strain == "c" & (time > 28 & Experiment != "GLUACvariants"), length(value[value > 0.25]), by = list(well, Experiment)][V1 > 10]
bad_blank_wells2 <- growth_data_long[,length(value[time < 4 & value > 0.3& Experiment != "GLUACvariants"]), by=list(well, Experiment)][V1 > 1]
bad_blank_wells3 <- growth_data_long[Strain == "c", (median(value[time > (max(time)-10)], na.rm = T)-median(value[time < 10])), by = list(well, Experiment)][V1 > 0.06]
bad_blank_wells <- unique(rbind(bad_blank_wells[,list(well, Experiment)], bad_blank_wells2[,list(well, Experiment)], bad_blank_wells3[,list(well, Experiment)]))
bad_blank_wells[,BadBlank:=1]
growth_data_long <- merge(growth_data_long, bad_blank_wells[,list(Experiment, well, BadBlank)], by = c("Experiment", "well"), all.x = T)
all_layouts <- merge(all_layouts, bad_blank_wells[,list(Experiment, well, BadBlank)], by = c("Experiment", "well"), all.x = T)
blank_wells1 <- all_layouts[Strain=="c" & is.na(BadBlank), list(Experiment, well, Condition)]

### Normalize all data
corrected_data_all <- rbindlist(lapply(1:nrow(results_tables), function(j){
  exp <- results_tables[j, Experiment]
  blank_wells <- blank_wells1[Experiment == exp, well]
  names(blank_wells) <- blank_wells1[Experiment == exp, Condition]
  corrected_data <- correct_growth_data_custom(growth_data_long[Experiment == exp], blank_wells = blank_wells, 
                                               method = "blankTimeMatch", wide_form = F)
  corrected_data[,Experiment:=exp]
}), fill = T)
## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
## Correcting based on provided blank wells for each condition, time-matched
problem_wells <- corrected_data_all[,sum(NewValue < 0), by=list(Experiment, well)][V1 > 0]
corrected_data_all[Experiment == "GLUACvariants",NewValue:=NewValue - min(NewValue, na.rm = T), by = list(Experiment, well)]
corrected_data_all[,NewValue:=NewValue - min(NewValue, na.rm = T), by = list(Experiment, well)]

### Reassign one sub-experiment
all_layouts[Experiment == "AgmatineCombinations" & grepl("old", Condition, ignore.case = T), Experiment:="DM1OldB12Old"]
corrected_data_all[Experiment == "AgmatineCombinations" & well %in% all_layouts[Experiment == "DM1OldB12Old", well], Experiment:="DM1OldB12Old"]

growth_data_norm <- dcast(corrected_data_all, Experiment+time~well, value.var = "NewValue", fill = NA)

experiment_list <- corrected_data_all[,sort(unique(Experiment))]
trim_after_times <- rep(52, length(experiment_list))
trim_after_times[which(experiment_list == "AcArgUracilRflv")] <- 64
trim_after_times[which(experiment_list == "AcetateDosesStrains2")] <- 44
trim_after_times[which(experiment_list == "AcetateDosesStrains")] <- 40

## Fit logistic models
model_fits <- rbindlist(lapply(1:length(experiment_list), function(x) {
  exp <- experiment_list[x]
  foo <- fit_growthcurver_models(growth_data_norm[Experiment == exp, 2:ncol(growth_data_norm), with=F], 
                                 indiv_fits = T, trim_after = trim_after_times[x]) #, nrow(growth_data_norm[Experiment == exp & time < 0.5]
  foo[,Experiment:=exp]
  return(foo)
}), fill = T)
model_fits <- merge(model_fits, all_layouts[,list(Experiment, well, Strain, Condition, BaseDM1Condition)], by.x = c("Experiment", "Sample"), by.y = c("Experiment", "well"), all.x = F)
model_fits[,Category:=gsub("[0-9]$", "", Experiment)]

## Calculate Percent of base version
base_models <- model_fits[BaseDM1Condition == "DM1"]
base_models[,BadFit:=ifelse(k > 1 | sigma > 0.1, 1, 0)]
base_model_summary <- base_models[BadFit==0 & Strain == "2243",list(1/mean(sapply(r[!is.na(r)], function(x){ 1/x})), sd(r), mean(k), sd(k), mean(N0), sd(N0), mean(t_mid), sd(t_mid), mean(t_gen), sd(t_gen), mean(auc_l), sd(auc_l), mean(auc_e), sd(auc_e)), by=list(Strain, Experiment)]
## Split "old' comparison into its own experiment

bad_base <- c("AminoAcidLOOs", "GLUACvariants", "GMM_LAB_Components2")
mean_base_good <- base_model_summary[!Experiment %in% bad_base,
                                     lapply(.SD, mean), .SDcols=paste0("V", seq(1,13,by=2))]

model_fits_compare <- merge(model_fits[Strain != "c"], base_model_summary[!Experiment %in% bad_base], by = "Experiment", all.x = T)
model_fits_compare[is.na(V1), V1:=mean_base_good[,V1]]
model_fits_compare[is.na(V3), V3:=mean_base_good[,V3]]
model_fits_compare[is.na(V5), V5:=mean_base_good[,V5]]
model_fits_compare[is.na(V7), V7:=mean_base_good[,V7]]
model_fits_compare[is.na(V9), V9:=mean_base_good[,V9]]
model_fits_compare[is.na(V11), V11:=mean_base_good[,V11]]
model_fits_compare[is.na(V13), V13:=mean_base_good[,V13]]

model_fits_compare[,FractionAUC_e:=auc_e/V13]
model_fits_compare[,FractionAUC_l:=auc_l/V11]

loo_dat <- model_fits[Experiment %in% c("AminoAcidLOOs", "MineralLOOs", "VitaminLOOs", "VitaminLOOs2", "VitaminLOOs3", "DM1OldB12Old", "AminoAcidLOORedo")| Experiment %in%
                                        c("AgmatineCombinations", "DM1OldB12Old")|(Experiment == "AcArgUracilRflv" & Condition %in% c("DM1b", "DM1bNoUracil")|(Experiment == "AddCarboxylicAcids" & Condition %in% c("Ac1", "Ac0")))]

condition_info <- unique(loo_dat[,list(Experiment, BaseDM1Condition, Condition)])
condition_info[BaseDM1Condition == "DM1", LOO:="DM1"]
condition_info[is.na(LOO),LOO:=c("Uracil", "Acetate", rep(NA, 12), "B12", "Arg", "Leu", "Lys", "Cys","His", "Ile", "Met","Trp", "Tyr",
                                 "Phe","Ser", "Thr", "Val", "Pro",NA,
                                 "Acetate",NA, NA, NA,
                       "Arg", rep(NA, 4), "His", "Cys", "Ile", "Lys", "Leu", "Met", "Ser", "Phe", "Thr", "Tyr","Trp", "Val", "Pro", "B12",
                       "Ni", "Zn", "H3BO3", "WO4", "Mg", "Mn", "Mo", "Fe", "Ca", "HCO3", "NO3", "Co", "Cu", NA, NA,NA, "Ascorbic acid", "B12", NA,
                       "NAD+", "Pyridoxamine", "Pantothenate", "Folate", "Menadione", "Thiamine","Biotin", "Riboflavin",
                       "Hemin", "p-aminobenzoate", NA, NA, "Ascorbic acid", "B12", NA, "NAD+", "Pyridoxamine", "Pantothenate", "Folate", "Menadione",
                       "Thiamine", "Biotin", "Riboflavin", "Hemin", "p-aminobenzoate", NA, NA, NA, "p-aminobenzoate", "Pantothenate", NA, "NAD+", "B12",
                       "Thiamine", NA, "Biotin", "Riboflavin", "Hemin", NA, NA, NA)]
condition_info <- condition_info[!(Experiment == "AminoAcidLOOs" & LOO == "Acetate")]
condition_info[!is.na(LOO),LOOCategory:=c("Other", "Other", "Other", "Other","Vitamins", "Vitamins", rep("Amino acids", 15),
                                          rep("Amino acids", 15), "Vitamins", "Vitamins",
                                          rep("Minerals", 14), rep("Vitamins", 35))]
condition_info[LOO=="DM1", LOOCategory:="DM1"]
condition_info[,LOOCategory:=factor(LOOCategory, levels = c("DM1", "Amino acids", "Vitamins", "Minerals", "Other"))]
levels(condition_info$LOOCategory) <- c("Full DM1", "Amino acid dropouts", "Vitamin dropouts", "Mineral dropouts", "Other dropouts")

corrected_data_all[,Time2:=round(time, digits = 1)]
corrected_data_all[,Condition2:=ifelse(Condition == "DM1", paste0(Condition, "_", Experiment), Condition)]
corrected_data_all[,Condition3:=paste0(Condition, "_", Experiment)]
average_dat <- corrected_data_all[Strain == "2243" & Experiment != "VitaminLOOs2" & (grepl("LOO", Experiment)|
                                                        Experiment %in% c("AgmatineCombinations", "DM1OldB12Old")|
                                                        (Experiment == "AcArgUracilRflv" & Condition %in% c("DM1b", "DM1bNoUracil"))|
                                                        (Experiment == "AddCarboxylicAcids" & Condition %in% c("Ac1", "Ac0"))),
                                  list(mean(NewValue), sd(NewValue)/sqrt(length(NewValue))), 
                                  by = list(Experiment,Condition2, Condition, Strain, Time2)]
average_dat_sub <- merge(average_dat, unique(condition_info[!is.na(LOO)]), by =c("Condition", "Experiment"), all.x = F)
setnames(average_dat_sub, "Time2", "time")

model_fits_compare <- merge(model_fits_compare, condition_info, by = c("Experiment", "Condition"), all.x = T)
loo_order2 <- model_fits_compare[!is.na(LOO),median(FractionAUC_l), by=LOO][order(V1), LOO]
model_fits_compare[,LOO:=factor(LOO, levels = loo_order2)] #rev(loo_colors)
model_fits_compare <- model_fits_compare[Strain.x == "2243"]
model_fits_compare[,Category2:=factor(gsub(" dropouts", "s", as.character(LOOCategory)), levels = c("Full DM1", "Amino acids", "Vitamins", "Minerals", "Others"))]

model_fits_compare[,FracTMid:=t_mid/V7]
model_fits_compare[,FracK:=k/V3]
model_fits_compare[,FracR:=r/V1]
setnames(model_fits_compare, c(paste0("V", 1:14)), paste0("base_", c("r_mean", "r_sd", "k_mean", "k_sd", "N0_mean", "N0_sd", 
                                                                     "tmid_mean", "tmid_sd", "tgen_mean", "tgen_sd", "aucL_mean", "aucL_sd", "aucE_mean", "aucE_sd")))

corrected_data_sub <- merge(corrected_data_all[Strain == "2243" & (grepl("LOO", Experiment)|
                                                                     Experiment %in% c("AgmatineCombinations", "DM1OldB12Old")|
                                                                     (Experiment == "AcArgUracilRflv" & Condition %in% c("DM1b", "DM1bNoUracil"))|
                                                                     (Experiment == "AddCarboxylicAcids" & Condition %in% c("Ac1", "Ac0")))], unique(condition_info[!is.na(LOO)]), by = c("Experiment", "Condition"), all.x = F)
loo_colors <- condition_info[!is.na(LOO)][order(LOOCategory, LOO)][,unique(LOO)]
average_dat_sub[,LOO:=factor(LOO, levels = loo_colors)]
average_dat_sub[,Group2:=ifelse(LOO != "DM1", as.character(LOOCategory), Experiment)]
average_dat_sub[LOO == "DM1" & (grepl("Vitamin", Experiment)|grepl("B12", Experiment)|grepl("Agma", Experiment)), Group2:="Vitamin dropouts"]
average_dat_sub[LOO == "DM1" & (grepl("Mineral", Experiment)) , Group2:="Mineral dropouts"]
average_dat_sub[LOO == "DM1" & (grepl("Amino", Experiment)) , Group2:="Amino acid dropouts"]
average_dat_sub[LOO == "DM1" &  (grepl("Ac", Experiment)|grepl("Urac", Experiment)) , Group2:="Other dropouts"]
average_dat_sub <- average_dat_sub[!(Experiment == "AminoAcidLOOs" & LOO=="DM1" & Group2=="Other dropouts")]
average_dat_sub[,Group2:=factor(Group2, levels = c("Amino acid dropouts", "Vitamin dropouts", "Mineral dropouts", "Other dropouts"))]


### Plots
## Get summary parameters
category_colors <- c(brewer.pal(8, "Dark2")[c(1,4,5, 3)])
category_colors <- c(brewer.pal(8, "Set3")[c(4,5,6,3)])
names(category_colors) <- c("Amino acids", "Vitamins", "Minerals", "Others")
category_colors2 <- category_colors
names(category_colors2)[1] <- "Amino acids &\nother carbon sources"

model_fits_compare_good2 <- model_fits_compare[!is.na(LOO) & LOOCategory != "Full DM1" & 
                                                 Experiment != "VitaminLOOs2" & Strain.x=="2243"]
model_fits_compare_good2 <- model_fits_compare_good2[(sigma < 0.15|r_se > 10) & base_k_mean < 1] ## Bad fits, don't lose any LOO data
fwrite(model_fits_compare_good2, file = paste0(outdir, "growth_curve_resultsAllSummaryParams_LOO.txt"), sep = "\t")

model_fits_compare_good2b <- melt(model_fits_compare_good2, id.var = c("LOOCategory", "LOO", "Sample", "Condition", "Experiment"), 
                                  measure.vars = c("auc_e", "t_mid", "k", "r"))
model_fits_compare_good2b[,LOOCategory2:=gsub(" dropouts", "s", as.character(LOOCategory))]
median_fracs2b <- model_fits_compare_good2b[Experiment !="VitaminLOOs2" & value < 100,median(value),
                                            by = list(LOOCategory, LOOCategory2, LOO, variable)]
median_fracs2b[, LOOCategory3:=ifelse(LOOCategory2 %in% c("Others", "Amino acids"), "Amino acids &\nother carbon sources", LOOCategory2)]
median_fracs2b[,LOOCategory3:=factor(LOOCategory3, levels = names(category_colors2[1:3]))]
base_means <- unique(model_fits_compare_good2[,list(Experiment, base_aucE_mean, base_tmid_mean, base_k_mean, base_r_mean)])
base_means <- data.table(variable = c("auc_e", "t_mid", "k", "r"), baseVal = c(median(base_means$base_aucE_mean), median(base_means$base_tmid_mean), median(base_means$base_k_mean), median(base_means$base_r_mean)))
parameter_hists2 <- ggplot(median_fracs2b, aes(x = V1, fill = LOOCategory3))+ 
  theme(strip.background = element_blank(), panel.border = element_rect(color = "black", fill = NA)) +
  geom_vline(data = base_means, aes(xintercept = baseVal), linetype = 2) + geom_histogram(bins = 15) + 
  facet_grid(LOOCategory3 ~ variable, scales = "free") +
  scale_fill_manual(values = category_colors2[1:3], name = "", guide = "none") + xlab("") + ylab("Number of compounds")

ggsave(parameter_hists2, file = paste0(outdir, "growth_parameter_histograms_clas_absVal.pdf"), width = 6.3, height = 4.5)

parameter_hists2

### Diff abund tests

base_models_sub <- base_models[BadFit==0 & Strain == "2243" & Experiment %in% model_fits_compare_good2b[,Experiment]]
unique_loos <- model_fits_compare_good2[Experiment != "VitaminLOOs2",unique(LOO)]
unique_loo_r_pvals <- sapply(unique_loos, function(x){
  exp1 <- model_fits_compare_good2[LOO==x]
  base_exp <- base_models_sub[Experiment %in% exp1$Experiment]
  foo <- wilcox.test(exp1$r, base_exp$r)$p.value
  return(foo)
})
unique_loo_k_pvals <- sapply(unique_loos, function(x){
  exp1 <- model_fits_compare_good2[LOO==x]
  base_exp <- base_models_sub[Experiment %in% exp1$Experiment]
  foo <- wilcox.test(exp1$k, base_exp$k)$p.value
  return(foo)
})
unique_loo_tmid_pvals <- sapply(unique_loos, function(x){
  exp1 <- model_fits_compare_good2[LOO==x]
  base_exp <- base_models_sub[Experiment %in% exp1$Experiment]
  foo <- wilcox.test(exp1$t_mid, base_exp$t_mid)$p.value
  return(foo)
})
unique_loo_auc_pvals <- sapply(unique_loos, function(x){
  exp1 <- model_fits_compare_good2[LOO==x]
  base_exp <- base_models_sub[Experiment %in% exp1$Experiment]
  foo <- wilcox.test(exp1$auc_e, base_exp$auc_e)$p.value
  return(foo)
})
diff_results <- data.table(LOO = unique_loos, r = unique_loo_r_pvals, k = unique_loo_k_pvals, tmid = unique_loo_tmid_pvals, auc = unique_loo_auc_pvals)
diff_results[,r_padj:=p.adjust(r, method = "BH")]
diff_results[,k_padj:=p.adjust(k, method = "BH")]
diff_results[,tmid_padj:=p.adjust(tmid, method = "BH")]
diff_results[,auc_padj:=p.adjust(auc, method = "BH")]
diff_results[,table(r_padj < 0.1|k_padj < 0.1|tmid_padj < 0.1|auc_padj < 0.1)]
## 
## FALSE  TRUE 
##    24    17
fwrite(diff_results, file = paste0(outdir, "loo_diffAbundWilcoxonGrowthParamsVsBase.csv"))

#Summary by compound
median_fracs <- model_fits_compare_good2b[Experiment !="VitaminLOOs2",median(value), by = list(LOOCategory, LOOCategory2, LOO, variable)]
median_fracs[, LOOCategory3:=ifelse(LOOCategory2 %in% c("Others", "Amino acids"), "Amino acids &\nother carbon sources", LOOCategory2)]
median_fracs[,LOOCategory3:=factor(LOOCategory3, levels = names(category_colors2[1:3]))]
median_fracs[,log2Effect:=log2(V1)]
median_fracs[log2Effect > 0.5 | log2Effect < -1, unique(LOO)]
##  [1] Uracil          Acetate         B12             Arg            
##  [5] His             Ile             Leu             Lys            
##  [9] Met             Phe             Pro             Ser            
## [13] Thr             Trp             Tyr             Val            
## [17] Ca              Co              Cu              Fe             
## [21] H3BO3           HCO3            Mg              Mn             
## [25] Mo              NO3             Ni              WO4            
## [29] Zn              Ascorbic acid   Folate          Hemin          
## [33] Menadione       NAD+            p-aminobenzoate Pantothenate   
## [37] Pyridoxamine    Riboflavin      Thiamine        Cys            
## [41] Biotin         
## 42 Levels: Mg Trp Arg Cys Riboflavin Biotin Met Acetate NAD+ Menadione ... WO4
median_fracs_wide <- dcast(median_fracs, LOOCategory+LOOCategory2+LOO~variable, value.var = "V1")

comp_subset <- unique(model_fits_compare_good2b[, list(LOO, Experiment)])
comp_subset <- comp_subset[!Experiment %in% c("AminoAcidLOOs", "VitaminLOOs2")]
comp_subset <- comp_subset[!(Experiment == "VitaminLOOs" & LOO %in% comp_subset[Experiment != "VitaminLOOs", unique(LOO)])]

model_summary2 <- model_fits_compare_good2[!is.na(LOO),list(length(unique(Experiment)), length(Sample), 1/mean(sapply(r[!is.na(r) & r != 0], function(x){ 1/x})), sd(r), mean(k), sd(k), mean(N0), sd(N0),
                                           mean(sigma), sd(sigma), mean(auc_l), sd(auc_l), mean(auc_e), sd(auc_e)), by = LOO]
setnames(model_summary2, c("LOO", "Number of Experiments", "Total number of replicates", "r_mean", "r_sd", "k_mean", "k_sd", "N0_mean", "N0_sd",
                           "sigma_mean", "sigma_sd", "auc_l_mean", "auc_l_sd", "auc_e_mean", "auc_e_sd"))  
base_summary <- data.table(LOO = "EDM1", base_models_sub[,list(length(unique(Experiment)), length(Sample), 1/mean(sapply(r[!is.na(r) & r != 0], function(x){ 1/x})), sd(r), mean(k), sd(k), mean(N0), sd(N0),
                                     mean(sigma), sd(sigma), mean(auc_l), sd(auc_l), mean(auc_e), sd(auc_e))])
setnames(base_summary, c("LOO", "Number of Experiments", "Total number of replicates", "r_mean", "r_sd", "k_mean", "k_sd", "N0_mean", "N0_sd",
                           "sigma_mean", "sigma_sd", "auc_l_mean", "auc_l_sd", "auc_e_mean", "auc_e_sd"))  

model_summary2 <- rbind(base_summary, model_summary2, fill = T)
fwrite(model_summary2, file = paste0(outdir, "suppTable_loo_growth_summary.tsv"), quote=F, sep= "\t")

comp_subset2 <- sort(unique(comp_subset[,LOO]))
average_dat_sub2 <- merge(average_dat_sub, comp_subset, by = c("LOO", "Experiment"), all.x = F)
average_dat_sub2 <- rbind(average_dat_sub2, average_dat_sub[LOO=="DM1" & Experiment %in% average_dat_sub2[,unique(Experiment)]], fill = T)

## Split it up for each compound

average_dat3 <- data.table()
for(j in 1:nrow(comp_subset)){
  dat_sub <- average_dat_sub2[Experiment == comp_subset[j, Experiment] & (LOO=="DM1" | LOO == comp_subset[j, LOO])]
  dat_sub[,LOOGroup:=comp_subset[j, LOO]]
  average_dat3 <- rbind(average_dat3, dat_sub)
}
average_dat3 <- merge(average_dat3, unique(model_fits_compare_good2b[,list(LOO, LOOCategory2)]), by.x = "LOOGroup", by.y = "LOO", all.x = T)

average_dat3[,ColorCol:=ifelse(LOO=="DM1", "EDM1", as.character(LOOCategory2))]
color_set <- c(category_colors, "grey50")
names(color_set)[5] <- "EDM1"
average_dat3[,LOOCategory2:=factor(LOOCategory2, levels = c("Amino acids", "Vitamins", "Minerals", "Others"))]
effect_ranking <- median_fracs_wide[order(auc_e), LOO]
average_dat3 <- average_dat3[!(LOOGroup=="B12" & Experiment %in% c("DM1OldB12Old", "VitaminLOOs3"))]
                                           
average_dat3[,OrderedLOO:=factor(LOOGroup, levels = effect_ranking)]
comp_order <- unique(average_dat3[order(LOOCategory2), LOOGroup])
facet_effect_plot <- ggplot(average_dat3[Condition2 != "DM1_v2"], aes(x = time, ymin = V1-V2, ymax = V1+V2, fill = ColorCol, group = Condition2)) + geom_ribbon(alpha = 0.4) + geom_line(aes(y = V1, color = ColorCol)) +
  ylab("E. lenta DSM 2243 abundance (normalized OD600)") + xlab("Time (hr)") + facet_wrap(~factor(LOOGroup, levels = comp_order), ncol = 7) + theme_bw() + scale_color_manual(values = color_set, name = "Removed compound class") + scale_fill_manual(values = color_set, name = "Removed compound class") + theme(strip.background= element_blank(), panel.grid = element_blank()) + xlim(0, 78)

ggsave(facet_effect_plot, file = paste0(outdir, "allGrowthCurveLOOs.pdf"), width = 7.3, height = 6)

fwrite(average_dat3, file = paste0(outdir, "finalAvgLOOgrowthdata.csv"))

facet_effect_plot

all_growth_data <- corrected_data_all[(Strain %in% c("2243", "c") & (Experiment != "VitaminLOOs2" & (grepl("LOO", Experiment))|
                                                                                       Experiment %in% c("DM1OldB12Old")|
                                                                                       (Experiment == "AcArgUracilRflv" & Condition %in% c("DM1b", "DM1bNoUracil")|
                                                                                       Experiment == "AddCarboxylicAcids")) |(Experiment == "AgmArgCitrl" & !grepl("Base2", Condition2)))|Experiment == "PantothenateStrains"|Experiment=="AcetateDosesStrains"]
fwrite(all_growth_data, file = paste0(outdir, "growthDataPaperAll.txt"), sep = "\t")

3 Figure 2 (unlabeled data)

rm(list = ls())

library(data.table)
library(ggplot2)
library(cowplot)
library(readxl)
library(tidyverse)
library(webchem)
library(vegan)
library(RColorBrewer)
library(ggpubr)
library(ComplexHeatmap)
library(santaR)
## 
## This is santaR version 1.2.3
library(ggrepel)

theme_set(theme_classic2())
datadir <- ""
outdir <- "figure2/"
dir.create(outdir) # Gives a warning if it already exists

sessionInfo()
## R version 4.1.1 (2021-08-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] santaR_1.2.3          lubridate_1.8.0       ggbeeswarm_0.6.0     
##  [4] growthcurver_0.3.1    KEGGREST_1.32.0       patchwork_1.1.1      
##  [7] ggrepel_0.9.1         ComplexHeatmap_2.13.1 ggpubr_0.4.0         
## [10] RColorBrewer_1.1-2    fastcluster_1.2.3     vegan_2.6-2          
## [13] lattice_0.20-45       permute_0.9-5         webchem_1.1.2        
## [16] forcats_0.5.1         stringr_1.4.0         dplyr_1.0.7          
## [19] purrr_0.3.4           readr_2.1.2           tidyr_1.2.0          
## [22] tibble_3.1.7          tidyverse_1.3.1       readxl_1.3.1         
## [25] cowplot_1.1.1         ggplot2_3.3.6         data.table_1.14.2    
## 
## loaded via a namespace (and not attached):
##   [1] colorspace_2.0-3       ggsignif_0.6.3         rjson_0.2.21          
##   [4] ellipsis_0.3.2         circlize_0.4.15        XVector_0.34.0        
##   [7] GlobalOptions_0.1.2    fs_1.5.0               clue_0.3-61           
##  [10] rstudioapi_0.13        farver_2.1.0           fansi_1.0.3           
##  [13] xml2_1.3.2             codetools_0.2-18       splines_4.1.1         
##  [16] doParallel_1.0.17      knitr_1.39             jsonlite_1.8.0        
##  [19] Cairo_1.6-0            broom_0.8.0            cluster_2.1.3         
##  [22] dbplyr_2.1.1           png_0.1-7              shiny_1.6.0           
##  [25] data.tree_1.0.0        compiler_4.1.1         httr_1.4.2            
##  [28] backports_1.2.1        assertthat_0.2.1       Matrix_1.4-1          
##  [31] fastmap_1.1.0          cli_3.3.0              later_1.3.0           
##  [34] htmltools_0.5.2        tools_4.1.1            gtable_0.3.0          
##  [37] glue_1.6.2             GenomeInfoDbData_1.2.7 Rcpp_1.0.8.3          
##  [40] carData_3.0-5          cellranger_1.1.0       jquerylib_0.1.4       
##  [43] vctrs_0.4.1            Biostrings_2.62.0      nlme_3.1-159          
##  [46] iterators_1.0.13       xfun_0.31              rvest_1.0.2           
##  [49] mime_0.12              lifecycle_1.0.0        rstatix_0.7.0         
##  [52] zlibbioc_1.40.0        MASS_7.3-57            scales_1.1.1          
##  [55] promises_1.2.0.1       hms_1.1.1              parallel_4.1.1        
##  [58] yaml_2.3.5             sass_0.4.1             stringi_1.7.6         
##  [61] highr_0.9              S4Vectors_0.32.4       foreach_1.5.2         
##  [64] BiocGenerics_0.40.0    shape_1.4.6            GenomeInfoDb_1.30.1   
##  [67] rlang_1.0.2            pkgconfig_2.0.3        matrixStats_0.62.0    
##  [70] bitops_1.0-7           evaluate_0.15          labeling_0.4.2        
##  [73] tidyselect_1.1.2       magrittr_2.0.3         R6_2.5.1              
##  [76] magick_2.7.3           IRanges_2.28.0         generics_0.1.0        
##  [79] DBI_1.1.1              pillar_1.7.0           haven_2.5.0           
##  [82] withr_2.5.0            mgcv_1.8-40            abind_1.4-5           
##  [85] RCurl_1.98-1.7         modelr_0.1.8           crayon_1.4.1          
##  [88] car_3.0-13             utf8_1.2.2             tzdb_0.3.0            
##  [91] rmarkdown_2.14         GetoptLong_1.0.5       minpack.lm_1.2-1      
##  [94] reprex_2.0.1           digest_0.6.29          xtable_1.8-4          
##  [97] httpuv_1.6.5           stats4_4.1.1           munsell_0.5.0         
## [100] beeswarm_0.4.0         vipor_0.4.5            bslib_0.3.1
processed_data <- fread("processedDatasets/timecourse_all_data.csv")
summary_data <- fread("processedDatasets/timecourse_mean_summaries.csv")
growth_file = "../TimeCourse_take2_2020-8-17/ODdata.xlsx"
growth_data = read_xlsx(growth_file, sheet = 1)
time_pts = read_xlsx(growth_file, sheet = 2)
time_pts = data.table(time_pts %>% mutate(TimePoint = as.numeric(gsub("TP", "", TimePoint))))
growth_data = data.table(growth_data) %>% melt(id.var = c("Strain", "AcConc", "Replicate"))
growth_data[,Time:=as.numeric(gsub(".*_", "", variable))]
growth2 = growth_data
setnames(growth2, "value", "OD600")
growth2[Time==41.5, Time:=42]
growth2[Time==26.5, Time:=26]
growth2[Time==30.5, Time:=30]
mean_growth2 = growth2[, .(meanOD =mean(OD600), sdOD = sd(OD600)), by=list(Strain, AcConc, Time)]
mean_data2 = processed_data[Sample != "c_10_TP1_1_R1",list(meanValue = mean(value), SD_Value = sd(value)), by=list(Time, TimePoint, AcConc, Strain, IonMode, IDUniq, AlignmentID, MetName, MSI)]
mean_data2[,Strain:=factor(Strain, levels = c("2243", "AB8", "Val", "Ctrl"))]

## Which mets are significantly diff with acetate 0 vs 1?
santaR_metadata <- unique(processed_data[Strain == "2243" & AcConc %in% c(0,1), list(Sample, Time, AcConc)])[order(Sample)]
santaR_metadata[,TubeSample:=gsub("_TP[0-9]", "", Sample)]
santaR_metadata <- column_to_rownames(santaR_metadata, "Sample")

santaR_data <- column_to_rownames(dcast(processed_data[Strain == "2243" & AcConc %in% c(0,1)], Sample~IDUniq, value.var = "log10value"), "Sample")
inData <- data.frame(santaR_data)
inMeta <- data.frame(santaR_metadata)
save(inMeta, inData, file = paste0(outdir, "timeCourse2SantaRFormat_Acetate0vs1.Rdata"))
test1 <- santaR_auto_fit(inputData = inData, ind = inMeta$TubeSample, time = inMeta$Time, 
                         group = inMeta$AcConc, pval.dist = T, CBand = F, df = 3, ncores = 24)
## Input data generated: 6.95 secs
## Spline fitted: 55.95 secs
## p-val dist done: 9.43 mins
## total time: 10.48 mins
save(test1, file = paste0(outdir, "santaR_acetate0v1results.rda"))
all_pvals <- sapply(test1, function(x){ return(x$general$pval.dist)})
hist(all_pvals, breaks = 50)

santar_results <- data.table(IDUniq = gsub("^X", "", names(test1)), PVal = all_pvals)
santar_summary <- santaR_auto_summary(test1)
## p-value dist found
## Benjamini-Hochberg corrected p-value
santar_summary_ac0v1 <- data.table(rownames_to_column(santar_summary$pval.all, "IDUniq"))
santar_summary_ac0v1[,IDUniq:=gsub("^X", "", IDUniq)]
setnames(santar_summary_ac0v1, names(santar_summary_ac0v1)[2:6], paste0("Ac0v1_", names(santar_summary_ac0v1)[2:6]))
variable_features3 <- santar_summary_ac0v1[Ac0v1_dist_BH < 0.25, IDUniq]

#Must also be signif diff from ctrls at final time point in at least 1 group
variable_features3 <- variable_features3[variable_features3 %in% summary_data[FDRCorrect < 0.2 & !is.na(FDRCorrect) & TimePoint==6 & Strain == 2243, unique(IDUniq)]]

processed_data[,ZScoreLog:=scale(log10value), by=list(IDUniq, Strain)]

mean_dat_plot <- processed_data[Strain %in% c("2243", "AB8", "Val") & AcConc %in% c(0,1, 10) & IDUniq %in% variable_features3 & Duplicate == 0, 
                                mean(ZScoreLog), by = list(Strain, AcConc, IDUniq,  MetName, MSI,Time, CF_superclass)]


mean_dat_plot[,DuplicateID:=length(unique(IDUniq)), by=MetName]

feat_list <- mean_dat_plot[!MetName %in% c("Unknown", "unknown") & Strain == 2243 & !(DuplicateID==2 & grepl("Pos", IDUniq)), unique(IDUniq)]
variable_feat_order_sub <- variable_features3[variable_features3 %in% feat_list]
label_names <- sapply(variable_feat_order_sub, function(x){ return(mean_dat_plot[IDUniq==x, MetName][1])})

mean_dat_plot_wide <- dcast(mean_dat_plot[IDUniq %in% variable_feat_order_sub & Strain == 2243], IDUniq~AcConc+Time, value.var = "V1")
label_names <- sapply(mean_dat_plot_wide[,IDUniq], function(x){ return(mean_dat_plot[IDUniq==x, paste0(MetName, " (", MSI, ")")][1])})

Heatmap(mean_dat_plot_wide[,2:ncol(mean_dat_plot_wide)], cluster_columns = F, col = c(brewer.pal(3, "Set1")[c(2,1)], "white")[c(1,3,2)], 
        row_labels = label_names, row_names_gp = gpar(fontsize = 7), column_split = factor(c(rep("0mM", 6), rep("1mM", 6), rep("10mM", 6)), levels = c("0mM", "1mM", "10mM")), 
        cluster_column_slices = F, column_labels = c(rep(c(sort(unique(mean_dat_plot[,Time]))), 3)), 
        column_names_rot = 0, column_names_gp = gpar(fontsize = 8))

pdf(paste0(outdir, "acetate_heatmap_vX.pdf"), width = 7, height = 5.6)
Heatmap(mean_dat_plot_wide[,2:ncol(mean_dat_plot_wide)], cluster_columns = F, col = c(brewer.pal(3, "Set1")[c(2,1)], "white")[c(1,3,2)], 
        row_labels = label_names, row_names_gp = gpar(fontsize = 7), column_split = factor(c(rep("0mM", 6), rep("1mM", 6), rep("10mM", 6)), levels = c("0mM", "1mM", "10mM")), 
        cluster_column_slices = F, column_labels = c(rep(c(sort(unique(mean_dat_plot[,Time]))), 3)), 
        column_names_rot = 0, column_names_gp = gpar(fontsize = 8))
dev.off()
## png 
##   2
############ Growth of all 3 strains
mean_growth <- growth2[,list(mean(OD600), sd(OD600)/sqrt(length(OD600))), by = list(Time, AcConc, Strain)]
growth_all <- ggplot(mean_growth[AcConc != "0Add10"], aes(x = Time, y = V1, color = AcConc)) + geom_line() + geom_errorbar(width = 0, aes(ymin = V1-V2, ymax = V1+V2)) + geom_point() + facet_wrap(~Strain, nrow = 3) + scale_color_manual(values = brewer.pal(5, "Blues")[3:5]) + ylab("OD600") + xlab("Time (hr)")

ggsave(growth_all, file = paste0(outdir, "acetateExperimentGrowth.pdf"), width = 3.5, height = 6)

growth_all

############## AB8 and Valencia SantaR
## Which mets are significantly diff with acetate 0 vs 1 for each of these strains?
santaR_metadata <- unique(processed_data[Strain == "AB8" & AcConc %in% c(0,1), list(Sample, Time, AcConc)])[order(Sample)]
santaR_metadata[,TubeSample:=gsub("_TP[0-9]", "", Sample)]
santaR_metadata <- column_to_rownames(santaR_metadata, "Sample")

santaR_data <- column_to_rownames(dcast(processed_data[Strain == "AB8" & AcConc %in% c(0,1)], Sample~IDUniq, value.var = "log10value"), "Sample")
inData <- data.frame(santaR_data)
inMeta <- data.frame(santaR_metadata)
save(inMeta, inData, file = paste0(outdir, "timeCourse2SantaRFormat_AB8_Acetate0vs1.Rdata"))
test1 <- santaR_auto_fit(inputData = inData, ind = inMeta$TubeSample, time = inMeta$Time, 
                         group = inMeta$AcConc, pval.dist = T, CBand = F, df = 3, ncores = 24)
## Input data generated: 7.56 secs
## Spline fitted: 1.01 mins
## p-val dist done: 9.46 mins
## total time: 10.6 mins
save(test1, file = paste0(outdir, "santaR_acetate0v1results_AB8.rda"))
all_pvals <- sapply(test1, function(x){ return(x$general$pval.dist)})
hist(all_pvals, breaks = 50)

santar_results <- data.table(IDUniq = gsub("^X", "", names(test1)), PVal = all_pvals)
santar_summary <- santaR_auto_summary(test1)
## p-value dist found
## Benjamini-Hochberg corrected p-value
santar_summary_ac0v1 <- data.table(rownames_to_column(santar_summary$pval.all, "IDUniq"))
santar_summary_ac0v1[,IDUniq:=gsub("^X", "", IDUniq)]
setnames(santar_summary_ac0v1, names(santar_summary_ac0v1)[2:6], paste0("Ac0v1_", names(santar_summary_ac0v1)[2:6]))
variable_features_ab8 <- santar_summary_ac0v1[Ac0v1_dist_BH < 0.25, IDUniq]

###### Valencia
santaR_metadata <- unique(processed_data[Strain == "Val" & AcConc %in% c(0,1), list(Sample, Time, AcConc)])[order(Sample)]
santaR_metadata[,TubeSample:=gsub("_TP[0-9]", "", Sample)]
santaR_metadata <- column_to_rownames(santaR_metadata, "Sample")

santaR_data <- column_to_rownames(dcast(processed_data[Strain == "Val" & AcConc %in% c(0,1)], Sample~IDUniq, value.var = "log10value"), "Sample")
inData <- data.frame(santaR_data)
inMeta <- data.frame(santaR_metadata)
save(inMeta, inData, file = paste0(outdir, "timeCourse2SantaRFormat_Val_Acetate0vs1.Rdata"))
test1 <- santaR_auto_fit(inputData = inData, ind = inMeta$TubeSample, time = inMeta$Time, 
                         group = inMeta$AcConc, pval.dist = T, CBand = F, df = 3, ncores = 24)
## Input data generated: 9.39 secs
## Spline fitted: 56.62 secs
## p-val dist done: 9.51 mins
## total time: 10.61 mins
save(test1, file = paste0(outdir, "santaR_acetate0v1results_Val.rda"))
all_pvals <- sapply(test1, function(x){ return(x$general$pval.dist)})
hist(all_pvals, breaks = 50)

santar_results <- data.table(IDUniq = gsub("^X", "", names(test1)), PVal = all_pvals)
santar_summary <- santaR_auto_summary(test1)
## p-value dist found
## Benjamini-Hochberg corrected p-value
santar_summary_ac0v1 <- data.table(rownames_to_column(santar_summary$pval.all, "IDUniq"))
santar_summary_ac0v1[,IDUniq:=gsub("^X", "", IDUniq)]
setnames(santar_summary_ac0v1, names(santar_summary_ac0v1)[2:6], paste0("Ac0v1_", names(santar_summary_ac0v1)[2:6]))
variable_features_val <- santar_summary_ac0v1[Ac0v1_dist_BH < 0.25, IDUniq]


############################ Heatmap all strains (Figure S3)
variable_feat_all <- sort(unique(c(variable_features3, variable_features_ab8, variable_features_val)))

mean_dat_plot <- processed_data[Strain %in% c("2243", "AB8", "Val") & AcConc %in% c(0,1, 10) & IDUniq %in% variable_feat_all & Duplicate == 0, 
                                mean(ZScoreLog), by = list(Strain, AcConc, IDUniq,  MetName, MSI,Time, CF_superclass)]
mean_dat_plot[,DuplicateID:=length(unique(IDUniq)), by=MetName]

feat_list <- mean_dat_plot[!MetName %in% c("Unknown", "unknown") & Strain == 2243 & !(DuplicateID==2 & grepl("Pos", IDUniq)), unique(IDUniq)]
variable_feat_order_sub <- variable_feat_all[variable_feat_all %in% feat_list]

label_names <- sapply(variable_feat_order_sub, function(x){ return(mean_dat_plot[IDUniq==x, MetName][1])})

mean_dat_plot_wide <- dcast(mean_dat_plot[IDUniq %in% variable_feat_order_sub], IDUniq~Strain+AcConc+Time, value.var = "V1")
label_names <- sapply(mean_dat_plot_wide[,IDUniq], function(x){ return(mean_dat_plot[IDUniq==x, paste0(MetName, " (", MSI, ")")][1])})

Heatmap(mean_dat_plot_wide[,20:ncol(mean_dat_plot_wide)], cluster_columns = F, col = c(brewer.pal(3, "Set1")[c(2,1)], "white")[c(1,3,2)], 
        row_labels = label_names, row_names_gp = gpar(fontsize = 7), 
        column_split = factor(c( 
                                paste0("AB8n2 ", c(rep("0mM", 6), rep("1mM", 6), rep("10mM", 6))),
                                paste0("Valencia ", c(rep("0mM", 6), rep("1mM", 6), rep("10mM", 6)))), 
                              levels = c("AB8n2 0mM", "AB8n2 1mM",
                                         "AB8n2 10mM", "Valencia 0mM", "Valencia 1mM", "Valencia 10mM")), 
        cluster_column_slices = F, column_labels = c(rep(c(sort(unique(mean_dat_plot[,Time]))), 6)), 
        column_names_rot = 0, column_names_gp = gpar(fontsize = 7), column_title_gp = gpar(fontsize = 10))

pdf(paste0(outdir, "acetate_heatmap_vX_OtherStrains.pdf"), width = 8, height = 5.8)
Heatmap(mean_dat_plot_wide[,20:ncol(mean_dat_plot_wide)], cluster_columns = F, col = c(brewer.pal(3, "Set1")[c(2,1)], "white")[c(1,3,2)], 
        row_labels = label_names, row_names_gp = gpar(fontsize = 7), 
        column_split = factor(c( 
                                paste0("AB8n2 ", c(rep("0mM", 6), rep("1mM", 6), rep("10mM", 6))),
                                paste0("Valencia ", c(rep("0mM", 6), rep("1mM", 6), rep("10mM", 6)))), 
                              levels = c("AB8n2 0mM", "AB8n2 1mM",
                                         "AB8n2 10mM", "Valencia 0mM", "Valencia 1mM", "Valencia 10mM")), 
        cluster_column_slices = F, column_labels = c(rep(c(sort(unique(mean_dat_plot[,Time]))), 6)), 
        column_names_rot = 0, column_names_gp = gpar(fontsize = 7), column_title_gp = gpar(fontsize = 10))
dev.off()
## png 
##   2

4 Figure 2 - acetate SIRM experiment, intracellular data

rm(list = ls())

library(data.table)
library(tidyverse)
library(readxl)
library(paletteer)
library(RColorBrewer)
library(ggpubr)
theme_set(theme_classic2())

################ Intracellular
datadir <- "../SILAcetateTimeCourse/"
outdir <- "figure2/"
dir.create(outdir)

sessionInfo()
## R version 4.1.1 (2021-08-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] paletteer_1.4.0       santaR_1.2.3          lubridate_1.8.0      
##  [4] ggbeeswarm_0.6.0      growthcurver_0.3.1    KEGGREST_1.32.0      
##  [7] patchwork_1.1.1       ggrepel_0.9.1         ComplexHeatmap_2.13.1
## [10] ggpubr_0.4.0          RColorBrewer_1.1-2    fastcluster_1.2.3    
## [13] vegan_2.6-2           lattice_0.20-45       permute_0.9-5        
## [16] webchem_1.1.2         forcats_0.5.1         stringr_1.4.0        
## [19] dplyr_1.0.7           purrr_0.3.4           readr_2.1.2          
## [22] tidyr_1.2.0           tibble_3.1.7          tidyverse_1.3.1      
## [25] readxl_1.3.1          cowplot_1.1.1         ggplot2_3.3.6        
## [28] data.table_1.14.2    
## 
## loaded via a namespace (and not attached):
##   [1] colorspace_2.0-3       ggsignif_0.6.3         rjson_0.2.21          
##   [4] ellipsis_0.3.2         circlize_0.4.15        XVector_0.34.0        
##   [7] GlobalOptions_0.1.2    fs_1.5.0               clue_0.3-61           
##  [10] rstudioapi_0.13        farver_2.1.0           fansi_1.0.3           
##  [13] xml2_1.3.2             codetools_0.2-18       splines_4.1.1         
##  [16] doParallel_1.0.17      knitr_1.39             jsonlite_1.8.0        
##  [19] Cairo_1.6-0            broom_0.8.0            cluster_2.1.3         
##  [22] dbplyr_2.1.1           png_0.1-7              shiny_1.6.0           
##  [25] data.tree_1.0.0        compiler_4.1.1         httr_1.4.2            
##  [28] backports_1.2.1        assertthat_0.2.1       Matrix_1.4-1          
##  [31] fastmap_1.1.0          cli_3.3.0              later_1.3.0           
##  [34] htmltools_0.5.2        tools_4.1.1            gtable_0.3.0          
##  [37] glue_1.6.2             GenomeInfoDbData_1.2.7 Rcpp_1.0.8.3          
##  [40] carData_3.0-5          cellranger_1.1.0       jquerylib_0.1.4       
##  [43] vctrs_0.4.1            Biostrings_2.62.0      nlme_3.1-159          
##  [46] iterators_1.0.13       xfun_0.31              rvest_1.0.2           
##  [49] mime_0.12              lifecycle_1.0.0        rstatix_0.7.0         
##  [52] zlibbioc_1.40.0        MASS_7.3-57            scales_1.1.1          
##  [55] promises_1.2.0.1       hms_1.1.1              parallel_4.1.1        
##  [58] rematch2_2.1.2         yaml_2.3.5             sass_0.4.1            
##  [61] stringi_1.7.6          highr_0.9              S4Vectors_0.32.4      
##  [64] foreach_1.5.2          BiocGenerics_0.40.0    shape_1.4.6           
##  [67] GenomeInfoDb_1.30.1    rlang_1.0.2            pkgconfig_2.0.3       
##  [70] matrixStats_0.62.0     bitops_1.0-7           evaluate_0.15         
##  [73] labeling_0.4.2         tidyselect_1.1.2       magrittr_2.0.3        
##  [76] R6_2.5.1               magick_2.7.3           IRanges_2.28.0        
##  [79] generics_0.1.0         DBI_1.1.1              pillar_1.7.0          
##  [82] haven_2.5.0            withr_2.5.0            mgcv_1.8-40           
##  [85] abind_1.4-5            RCurl_1.98-1.7         modelr_0.1.8          
##  [88] crayon_1.4.1           car_3.0-13             utf8_1.2.2            
##  [91] tzdb_0.3.0             rmarkdown_2.14         GetoptLong_1.0.5      
##  [94] minpack.lm_1.2-1       reprex_2.0.1           digest_0.6.29         
##  [97] xtable_1.8-4           httpuv_1.6.5           stats4_4.1.1          
## [100] munsell_0.5.0          beeswarm_0.4.0         vipor_0.4.5           
## [103] bslib_0.3.1
##### Set up
lower_level_intra_data <- readRDS(paste0(datadir, "IntracellularExtracts/LowLevelDataAllProcessed.rds"))
bad_feat3 <- lower_level_intra_data[AreaFrac > 1000 & NumLabeledC > 0, sum(Status == "Contaminating mass"), by = FeatID] ## 
lower_level_intra_data[,BadFeat3:=ifelse(FeatID %in% bad_feat3[V1 > 11, FeatID], 1, 0)] ## contaminating in > 5% of 
lower_level_intra_data <- lower_level_intra_data[BadFeat3==0]
lower_level_intra_data <- lower_level_intra_data[is.na(AlwaysZero)]

feat_wide <- dcast(lower_level_intra_data, Name+MetID+Formula+Tags.y+AnnotationMolWt+RT+NumLabeledC+TotNumC+Mode+FracLabeled+CompID+FeatID~Sample2, value.var = "AreaFrac", fill = 0)
lower_level_data_long <- melt(feat_wide, id.vars = names(feat_wide)[1:12], variable.name = "Sample", value.name = "AreaFrac")
lower_level_data_long[,c("Strain", "AcetateGroup", "Replicate", "TimePoint"):=tstrsplit(Sample, split = "_")]
lower_level_data_long[,TimePoint:=case_when(
  TimePoint=="TP4i" ~ "TP3",
  TimePoint=="TP6i" ~ "TP5"
)]
lower_level_data_long[,AcetateGroup:=gsub("Ac", "", AcetateGroup)]
# Avg and SE across replicates
summary_tab2 <- lower_level_data_long[TimePoint == "TP5",list(mean(AreaFrac), median(AreaFrac), sd(AreaFrac)/sqrt(length(AreaFrac))), by=list(AcetateGroup, Strain, CompID, Name, Tags.y, NumLabeledC, FeatID, AnnotationMolWt, Formula)]
summary_tab2[,NumLabeledC2:=ifelse(NumLabeledC >= 9, "M+9 or more", paste0("M+", NumLabeledC))]


# Tot signal from labeled vs unlabeled
tot_labeled_unlabeled <- summary_tab2[,sum(V1), by=list(AcetateGroup, Strain, CompID, Name, Tags.y, NumLabeledC==0)]
summary_tab <- dcast(tot_labeled_unlabeled, AcetateGroup+Strain+CompID+Name+Tags.y~NumLabeledC, value.var = "V1")
setnames(summary_tab, c("TRUE", "FALSE"), c("Unlabeled", "Labeled"))
summary_tab[,sum(Labeled > 1000), by=CompID]
##                                                                                      CompID
##   1: Neg_2-Hydroxy-4-methylpentanoic acid_[M-H]-_LVRFTAZAXQPQHI-UHFFFAOYSA-N_132.0785_1.854
##   2:                                                 Neg_3-Phenyllactic acid_166.0628_2.249
##   3:                                                  Neg_5-Aminovaleric acid_117.0787_7.79
##   4:                                                        Neg_Azelaic acid_188.1046_1.448
##   5:                                                     Neg_Citraconic acid_130.0265_1.682
##  ---                                                                                       
## 177:                                                  Pos_Unknown_337.33446_338.3414_12.028
## 178:                                                   Pos_Unknown_347.15551_348.1662_7.328
## 179:                                                   Pos_Unknown_360.19367_361.1981_8.667
## 180:                                                      Pos_Valine_117.07898_118.086_7.78
## 181:                Pos_[Alanine_[M+H]+_QNAYBMKLOCPYGJ-REOHCLBHSA-N]_89.04768_90.0549_8.107
##      V1
##   1:  7
##   2: 11
##   3: 18
##   4:  3
##   5: 11
##  ---   
## 177: 12
## 178:  9
## 179:  5
## 180: NA
## 181:  6
summary_tab[is.na(Labeled), Labeled:=0]
summary_tab[is.na(Unlabeled), Unlabeled:=0]
unlabeled_comps <- summary_tab[Labeled < 1000 & Unlabeled > 10000 & AcetateGroup %in% c("1L", "10L") & Strain != "c", unique(CompID)]
unlabeled_comps2 <- summary_tab[Unlabeled > 2*Labeled & AcetateGroup %in% c("1L", "10L") & Strain != "c" , unique(CompID)]
ctrl_levels <- summary_tab[Strain == "c", list(mean(Labeled), mean(Unlabeled)), by=list(CompID, AcetateGroup)]
setnames(ctrl_levels, c("V1", "V2"), c("ctrlLabeled", "ctrlUnlabeled"))
summary_tab <- merge(summary_tab, ctrl_levels, by = c("CompID", "AcetateGroup"), all.x = T)

unlabeled_comps3 <- summary_tab[Labeled < 1000 & Unlabeled > 10000 & AcetateGroup %in% c("1L", "10L") & Strain != "c" & (Labeled+Unlabeled)/(ctrlLabeled+ctrlUnlabeled) > 1.3, unique(CompID)]


#### Define compounds to plot
top_comps2 <- summary_tab[Strain == "2243" & AcetateGroup %in% c("1L", "10L") & 
                            Labeled/(Labeled+Unlabeled) > 0.15 & Labeled > 10000, unique(CompID)]

bad_comps <- summary_tab[Strain != "c" & !AcetateGroup %in% c("1L", "10L") & Labeled/(Labeled+Unlabeled) > 0.5 , unique(CompID)]
top_comps2 <- top_comps2[!top_comps2 %in% bad_comps]

#What labeled comps are added by other species?
top_comps2_otherspec <- summary_tab[Strain %in% c("AB8n2", "Valencia") & AcetateGroup %in% c("1L", "10L") & Labeled/(Labeled+Unlabeled) > 0.15 & Labeled > 10000, unique(CompID)]
top_comps2_otherspec <- top_comps2_otherspec[!top_comps2_otherspec %in% bad_comps]
length(top_comps2_otherspec[!top_comps2_otherspec %in% top_comps2])
## [1] 4
top_comps_ab8 <- summary_tab[Strain == "AB8n2" & AcetateGroup %in% c("1L", "10L") & 
                            Labeled/(Labeled+Unlabeled) > 0.15 & Labeled > 10000, unique(CompID)]

top_comps_ab8 <- top_comps_ab8[!top_comps_ab8 %in% bad_comps]

top_comps_val <- summary_tab[Strain == "Valencia" & AcetateGroup %in% c("1L", "10L") & 
                               Labeled/(Labeled+Unlabeled) > 0.15 & Labeled > 10000, unique(CompID)]

top_comps_val <- top_comps_val[!top_comps_val %in% bad_comps]


top_data_intra <- summary_tab2[CompID %in% top_comps2 & AcetateGroup %in% c("1L", "10L")]
top_data_intra[,Class:=case_when(
  grepl("NAD", CompID) ~ "Vitamins and cofactors",
  grepl("UDP", CompID) ~ "Other",
  grepl("Thymidine", CompID) ~ "Nucleic acids",
  grepl("Acetyl", CompID) ~ "Other",
  grepl("Glucose", CompID) ~ "Carbohydrates",
  grepl("Orotic", CompID) ~ "Nucleic acids",
  grepl("Gluta", CompID) ~ "Peptides and amino acids",
  grepl("Alanine", CompID) ~ "Peptides and amino acids"
)]

top_data_intra[,Name2:=gsub("^[A-Za-z]+_", "", CompID)]
comp_order <- unique(top_data_intra[,list(Name2, Class)])[order(Class), Name2]

color_scale2 <- c("gray", brewer.pal(9, "YlGnBu"))
names(color_scale2) <- top_data_intra[Strain %in% c("c", "2243") & !(grepl("N-Acetyl", CompID)|grepl("Acetylarg", CompID)|grepl("Pos_UDP-N-acetyl", CompID)),sort(unique(NumLabeledC2))]

intracellular_plot5 <- ggplot(top_data_intra[Strain %in% c("2243") & !grepl("known", CompID) & !(grepl("N-Acetyl", CompID)|grepl("Acetylarg", CompID)|grepl("Pos_UDP-N-acetyl", CompID)) ], 
                              aes(x = factor(AcetateGroup, levels = c("1L", "10L")), y = V1, fill = NumLabeledC2)) + 
  geom_bar(stat = "identity", position = "fill", color = "black") + facet_wrap(factor(Name2, levels = comp_order)~., ncol = 9) + 
  theme_classic2() + theme(strip.text = element_text(angle =0, size = 9, hjust = 0, face = "bold"), axis.text = element_text(size = 7), strip.background = element_blank(), legend.position = "bottom")+ 
  scale_fill_manual(values = color_scale2, guide = guide_legend(), name = "")+ scale_x_discrete(labels = c("1mM", "10mM"), name = "") +
  ylab("Share of Peak Area")

ggsave(intracellular_plot5, file = paste0(outdir, "intracellular_sil_2243_ID_summary_2rows.pdf"), width = 8.5, height = 2.6)

intracellular_plot5

summary_tab2[CompID %in% top_comps2 & Strain %in% c("c", "2243") & grepl("known", CompID) & AcetateGroup %in% c("1L", "10L") , length(unique(CompID))]
## [1] 7
###### What frac of UDP is labeled?             
summary_tab[grepl("UDP", CompID) & Strain == "2243" & AcetateGroup %in% c("1L", "10L"), mean(Labeled/(Unlabeled+Labeled)), by = AcetateGroup]
##    AcetateGroup        V1
## 1:          10L 1.0000000
## 2:           1L 0.9758363
### Combined AB8, Val, 2243 plot
combined_dat <- summary_tab2[AcetateGroup %in% c("1L", "10L")& Strain != "c" & CompID %in% c(top_comps2, top_comps_ab8, top_comps_val) & !(grepl("N-Acetyl", CompID)|grepl("Acetylarg", CompID)|grepl("Pos_UDP-N-acetyl", CompID)) ]
combined_dat[,CompLab:=gsub("Labeled ", "", CompID)]
combined_dat[grepl("Alanine", CompID), CompLab:="Pos_Alanine_89.04768_90.0549_8.107"]
combined_dat[,CompLab:=ifelse(grepl("Neg", CompID), paste0(gsub("Neg_", "", CompLab), "_Neg"), 
                              paste0(gsub("Pos_", "", CompLab), "_Pos"))]
combined_dat <- combined_dat[!CompLab %in% c("Unknown_129.0424_8.79_Neg" , "Unknown_129.04259_130.0498_8.662_Pos")] ## Redundant
intracellular_3strain <- ggplot(combined_dat, 
                                aes(x = factor(AcetateGroup, levels = c("1L", "10L")), y = V1, fill = NumLabeledC2)) + 
  geom_bar(stat = "identity", position = "fill", color = "black") + facet_grid(Strain~CompLab, switch = "y") + 
  theme_classic2() + theme(strip.placement = "outside", strip.text.y = element_text(angle = 0, size = 9, hjust = 0, face = "bold"), 
                           strip.text.x = element_text(size = 7), axis.text = element_text(size = 7), strip.background = element_blank(), 
                           legend.position = "bottom") + 
  scale_fill_manual(values = color_scale2, guide = guide_legend(), name = "")+ scale_x_discrete(labels = c("1mM", "10mM"), name = "") +
  ylab("Share of Peak Area")
ggsave(intracellular_3strain, file = paste0(outdir, "intracellular_sil_3strains.pdf"), width = 9, height = 4.5)

combined_dat[,MSI:=gsub(";", "", gsub("Labeled", "", Tags.y))]
combined_dat[MSI == "NA", MSI:=""]
intracellular_3strain_id <- ggplot(combined_dat[!grepl("known", CompLab)], 
                                aes(x = factor(AcetateGroup, levels = c("1L", "10L")), y = V1, fill = NumLabeledC2)) + 
  geom_bar(stat = "identity", position = "fill", color = "black") + facet_grid(Strain~paste0(Name, " (", MSI, ")"), switch = "y") + 
  theme_classic2() + theme(strip.placement = "outside", strip.text.y = element_text(angle = 0, size = 9, hjust = 0, face = "bold"), 
                           strip.text.x = element_text(size = 7), axis.text = element_text(size = 7), strip.background = element_blank(), 
                           legend.position = "bottom") + 
  scale_fill_manual(values = color_scale2, guide = guide_legend(), name = "")+ scale_x_discrete(labels = c("1mM", "10mM"), name = "") +
  ylab("Share of Peak Area")

ggsave(intracellular_3strain_id, file = paste0(outdir, "intracellular_sil_3strains_id.pdf"), width = 7, height = 4)

intracellular_3strain_id

intracellular_3strain_unknown <- ggplot(combined_dat[grepl("known", CompLab)], 
                                   aes(x = factor(AcetateGroup, levels = c("1L", "10L")), y = V1, fill = NumLabeledC2)) + 
  geom_bar(stat = "identity", position = "fill", color = "black") + facet_grid(Strain~CompLab, switch = "y") + 
  theme_classic2() + theme(strip.placement = "outside", strip.text.y = element_text(angle = 0, size = 9, hjust = 0, face = "bold"), 
                           strip.text.x = element_text(size = 7), axis.text = element_text(size = 7), strip.background = element_blank(), 
                           legend.position = "bottom") + 
  scale_fill_manual(values = color_scale2, guide = guide_legend(), name = "")+ scale_x_discrete(labels = c("1mM", "10mM"), name = "") +
  ylab("Share of Peak Area")

ggsave(intracellular_3strain_unknown, file = paste0(outdir, "intracellular_sil_3strains_unknown.pdf"), width = 7, height = 4)

intracellular_3strain_unknown

#### Supp table
tab_save <- summary_tab2[(CompID %in% c(top_comps2, top_comps_val, top_comps_ab8)) & Strain != "c" & AcetateGroup %in% c("1L", "10L")][V1 != 0]
setnames(tab_save, c("V1", "V3"), c("MeanPeakArea", "SEPeakArea"))
tab_save[,MID:=MeanPeakArea/sum(MeanPeakArea), by = list(CompID, Strain, AcetateGroup)]
tab_save[,Strain:=case_when(
  Strain == "AB8" ~ "AB8n2",
  Strain == "Val" ~ "Valencia",
  TRUE ~ Strain
)]
good_isos <- tab_save[,sum(MeanPeakArea), by=list(CompID, NumLabeledC)][V1 > 50000]

tab_save <- dcast(tab_save[paste0(CompID, NumLabeledC) %in% good_isos[,paste0(CompID, NumLabeledC)]], CompID+Tags.y+Formula+Strain ~ paste0("M+", NumLabeledC) + AcetateGroup, value.var = c("MeanPeakArea", "SEPeakArea", "MID"))
names(tab_save)[grepl("MeanPeakArea", names(tab_save))] <- paste0(gsub("MeanPeakArea_", "", names(tab_save)[grepl("MeanPeakArea", names(tab_save))]), "_Mean Peak Area")
names(tab_save)[grepl("SEPeakArea", names(tab_save))] <- paste0(gsub("SEPeakArea_", "", names(tab_save)[grepl("SEPeakArea", names(tab_save))]), "_SE Peak Area")
names(tab_save)[grepl("MID", names(tab_save))] <- paste0(gsub("MID_", "", names(tab_save)[grepl("MID", names(tab_save))]), "_MID")
tab_save[,MolWt:=sapply(CompID, function(x){
  foo <- strsplit(x, split = "_")[[1]]
  return(foo[3])
})]
tab_save[,RT:=sapply(CompID, function(x){
  foo <- strsplit(x, split = "_")[[1]]
  return(foo[length(foo)])
})]
tab_save[grepl("Alanine", CompID), MolWt:="89.04768"]
tab_save[,CompID:=sapply(CompID, function(x){
  foo <- strsplit(x, split = "_")[[1]]
  return(paste0(foo[2], "_", foo[1]))
})]
tab_save <- tab_save[,c("Strain", "CompID", "Tags.y", "MolWt", "Formula", "RT", sort(names(tab_save)[7:ncol(tab_save)])), with=F]
fwrite(tab_save[order(Strain, CompID)], file = paste0(outdir, "intracellular_ac_labeling_summary.tsv"), sep = "\t")


rm(list = ls())

5 Figure 2/Figure S4 - acetate SIRM experiment, intracellular data

########## Extracellular
datadir <- "../SILAcetateTimeCourse/"
datadir2 <- "../"
outdir <- "figure2/"

low_level_data <- readRDS(paste0(datadir2, "LowLevelDataAllProcessedFixedAcetate.rds"))
top_data <- readRDS(paste0(datadir2, "SILprocessedTopLevelDataReplicatesFixedAcetate.rds"))
top_data_mean <- readRDS(paste0(datadir2, "SILprocessedTopLevelDataMeansFixedAcetate.rds"))

low_level_data[,Strain2:=factor(Strain, levels = c("c", "2243", "AB8", "Val"))]
levels(low_level_data$Strain2) <- c("Ctrl", "2243", "AB8n2", "Valencia")

bad_feat2 <- low_level_data[!grepl("L", AcetateGroup) & AreaFrac > 1e6 & NumLabeledC > 0, list(CompID, ExLabel, NumLabeledC, FeatID)][,length(NumLabeledC), by=list(CompID, ExLabel, FeatID)][V1 > 1]
bad_feats <- low_level_data[BadFeat==1,list(CompID, ExLabel, NumLabeledC)][,length(NumLabeledC), by=list(CompID, ExLabel)][V1 > 1]
bad_feats[,BadFeatAll:=1]

most_signal <- low_level_data[TimePoint=="TP8" & Strain != "c",sum(AreaFrac*NumLabeledC), 
                              by = list(Strain, AcetateGroup, TimePoint, Time, CompID)][,max(V1), by=CompID][order(V1, decreasing = T)][1:7, CompID]

color_scale = c("gray", paletteer::paletteer_dynamic("cartography::turquoise.pal", 16))
names(color_scale) = c(seq(0, 15), 17)
color_scale2 <- c("gray", brewer.pal(9, "YlGnBu"))
low_level_data[,NumLabeledC2:=ifelse(NumLabeledC >= 9, "M+9 or more", paste0("M+", NumLabeledC))]
# Avg and SE across replicates

summary_tab2ex <- low_level_data[Sample2 != "c_10L_TP8_1",list(mean(AreaFrac), sd(AreaFrac)/sqrt(length(AreaFrac))), by=list(AcetateGroup, Strain, CompID, Name, ExLabel, FeatID, TimePoint, Time, MSI, MolWt, Formula)]
summary_tab2ex[,NumLabeledC:=as.numeric(gsub("ExRate", "", ExLabel))]
tot_labeled_unlabeled_ex <- summary_tab2ex[TimePoint=="TP8",sum(V1), by=list(AcetateGroup, Strain, CompID, Name, ExLabel=="ExRate0")]
summary_tab_ex <- dcast(tot_labeled_unlabeled_ex, AcetateGroup+Strain+CompID+Name~ExLabel, value.var = "V1")
setnames(summary_tab_ex, c("TRUE", "FALSE"), c("Unlabeled", "Labeled"))

#### Define top comps
top_comps2ex <- summary_tab_ex[Strain == "2243" & AcetateGroup %in% c( "10L") & Labeled/(Labeled+Unlabeled) > 0.5 & 
                                 Labeled > 50000, unique(CompID)]

bad_comps_ex <- summary_tab_ex[Strain != "c" & !AcetateGroup %in% c("1L", "10L") & Labeled/(Labeled+Unlabeled) > 0.5 & Labeled > 1e5, unique(CompID)]
top_comps2ex <- top_comps2ex[!top_comps2ex %in% bad_comps_ex]

top_comps2ex_otherspec <- summary_tab_ex[Strain %in% c("AB8", "Val") & AcetateGroup %in% c( "10L") & Labeled/(Labeled+Unlabeled) > 0.5 & Labeled > 50000, unique(CompID)]
top_comps2ex_otherspec <- top_comps2ex_otherspec[!top_comps2ex_otherspec %in% bad_comps_ex]
length(top_comps2ex_otherspec[!top_comps2ex_otherspec %in% top_comps2ex])
## [1] 2
# Get rid of duplicates
top_comps2ex_sub <- top_comps2ex[!top_comps2ex %in% c("ACETYL ARGININE_Neg", "D-(-)-Glutamine_Neg", 
                                                      "L-Glutamic acid_Neg", "N-Acetylornithine_Neg",
                                                      "N-Acetyltryptophan_Neg", "Pantothenic acid_Neg")]

color_scale2 <- c("gray", brewer.pal(9, "YlGnBu"))
summary_tab2ex[,NumLabeledC2:=ifelse(NumLabeledC >= 9, "M+9 or more", paste0("M+", NumLabeledC))]
names(color_scale2) <- summary_tab2ex[,sort(unique(NumLabeledC2))]
color_scale_sub <- color_scale2[names(color_scale2) %in% summary_tab2ex[CompID %in% top_comps2ex & Strain == 2243 & AcetateGroup %in% c("10L") & !grepl("myristic", CompID) & !grepl("ASCORBIC", CompID) & V1 != 0, NumLabeledC2]]

summary_tab2ex[CompID=="Acetylarginine_Pos", CompID:="N-Acetyl-arginine_Pos"]

dup_comps <- c("D-(-)-Glutamine_Neg", "N-Acetylornithine_Neg", "L-Glutamic acid_Neg", "N-Acetyltryptophan_Neg", "Pantothenic acid_Neg")
extracellular_plot5 <- ggplot(summary_tab2ex[!CompID %in% dup_comps & (CompID %in% top_comps2ex_sub|CompID %in% top_comps2ex_otherspec)& Strain != "c" & AcetateGroup %in% c("10L") & !grepl("ASCORBIC", CompID) & V1 != 0], 
                              aes(x = factor(paste0(Strain, " ", Time)), y = V1, fill = NumLabeledC2)) + 
  geom_vline(xintercept = 9.5, size = 0.3, linetype = 2) + geom_vline(xintercept = 18.5, size = 0.3, linetype = 2) +
  geom_bar(stat = "identity", position = "stack", color = "black", size = 0.1) + facet_wrap(.~paste0(Name, " (", MSI, ")"), scales = "free_y", ncol = 6) + 
  theme_classic2() + theme(axis.ticks.x = element_blank(), strip.text = element_text(angle =0, size = 9, hjust = 0), 
                           axis.text.y = element_text(size = 5), axis.text.x = element_text(size = 8, face = "bold"), 
                           strip.background= element_blank(), legend.position = "bottom")+ 
  scale_fill_manual(values = color_scale_sub, guide = guide_legend(), name = "")+ scale_y_continuous(labels = scales::scientific, name = "Mean Peak Area") +
  xlab("Time point") + 
  scale_x_discrete(labels = c("", "", "","", "2243", rep("", 8), "AB8n2", rep("", 8), "Valencia", rep("", 4))) 

ggsave(extracellular_plot5, file = paste0(outdir, "extracellular_SIL_summaryIDcomps_allStrains.pdf"), width= 8.5, height = 5)

extracellular_plot5 

##### Extracellular compound summary (supp table)
tab_save <- summary_tab2ex[(CompID %in% top_comps2ex_sub|CompID %in% top_comps2ex_otherspec) & Strain != "c" & TimePoint == "TP8" & AcetateGroup %in% c("1L", "10L")][V1 != 0]
setnames(tab_save, c("V1", "V2"), c("MeanPeakArea", "SEPeakArea"))
tab_save[,MID:=MeanPeakArea/sum(MeanPeakArea), by = list(CompID, Strain, AcetateGroup)]
tab_save[,Strain:=case_when(
  Strain == "AB8" ~ "AB8n2",
  Strain == "Val" ~ "Valencia",
  TRUE ~ Strain
)]
good_isos <- tab_save[,sum(MeanPeakArea), by=list(CompID, NumLabeledC)][V1 > 50000]

tab_save <- dcast(tab_save[paste0(CompID, NumLabeledC) %in% good_isos[,paste0(CompID, NumLabeledC)]], CompID+MolWt+Formula+MSI+Strain ~ paste0("M+", NumLabeledC) + AcetateGroup, value.var = c("MeanPeakArea", "SEPeakArea", "MID"))
names(tab_save)[grepl("MeanPeakArea", names(tab_save))] <- paste0(gsub("MeanPeakArea_", "", names(tab_save)[grepl("MeanPeakArea", names(tab_save))]), "_Mean Peak Area")
names(tab_save)[grepl("SEPeakArea", names(tab_save))] <- paste0(gsub("SEPeakArea_", "", names(tab_save)[grepl("SEPeakArea", names(tab_save))]), "_SE Peak Area")
names(tab_save)[grepl("MID", names(tab_save))] <- paste0(gsub("MID_", "", names(tab_save)[grepl("MID", names(tab_save))]), "_MID")
tab_save <- tab_save[,c("Strain", "CompID", "MolWt", "Formula", "MSI", sort(names(tab_save)[4:ncol(tab_save)])), with=F]
fwrite(tab_save[order(Strain, CompID)], file = paste0(outdir, "extracellular_ac_labeling_summary.tsv"), sep = "\t")


###### Count of labeled compounds

low_level_data[,TubeSample2:=paste0(Strain, "_", AcetateGroup2, "_", Replicate)]
low_level_data[,BadCtrl:=ifelse(TubeSample2 %in% c("c_10L_1", "c_1L_3"), 1, 0)]
bad_feat3 <- low_level_data[AreaFrac > 0 & NumLabeledC > 0, sum(Status == "Contaminating mass"), by = FeatID] ## 
low_level_data[,BadFeat3:=ifelse(FeatID %in% bad_feat3[V1 > 16, FeatID], 1, 0)] ## contaminating in 
## Remove labeled features present at high abund in first time point, not relevant
bad_feat4 <- low_level_data[NumLabeledC > 0 & (Time < 15|(Strain == "c" & Time < 30)) & AreaFrac > 5e5, unique(FeatID)]
#low_level_data <- low_level_data[!FeatID %in% bad_feat4]

labeled_compounds <- low_level_data[BadFeat==0 & BadFeat3 == 0 & BadCtrl == 0 & !FeatID %in% bad_feat4 & NumLabeledC > 0 & !Status %in% c("Contaminating mass", "Low pattern fit"), sum(AreaFrac), by = list(Name, CompID, Strain, AcetateGroup2, TubeSample,TimePoint, Sample, MSI, Formula, Replicate, Time, TotNumC)]

labeled_compounds_count <- labeled_compounds[,length(unique(CompID[V1 > 100000])), by = list(Strain, AcetateGroup2,TimePoint, Time, Replicate)]


ac_group_scale <- brewer.pal(5, "Paired")[c(5, 1:4)]
names(ac_group_scale) <- c("0", "1", "1L", "10", "10L")

labeled_compounds_comp_mean <- labeled_compounds_count[,list(mean(V1), sd(V1)/sqrt(length(V1))), by = list(Strain, AcetateGroup2, TimePoint, Time)]
labeled_compound_plot <- ggplot(labeled_compounds_comp_mean[Strain %in% c(2243, "c")], aes(x = Time, y = V1, color = AcetateGroup2)) + facet_wrap(~ifelse(Strain == "c", "Sterile ctrls", "E. lenta DSM 2243")) + geom_line() + geom_errorbar(width = 0, aes(ymin = V1-V2, ymax = V1+V2)) + geom_point() + scale_color_manual(values = ac_group_scale, labels = c("0 Ac", "1mM Ac", "1mM labeled Ac", "10mM Ac", "10mM labeled Ac"), name = "") + theme(legend.position = "right", strip.text = element_text(face = "bold"), strip.background = element_blank()) + xlab("Time (hr)") + ylab("Number of labeled\ncompounds detected\nby LC-MS/MS")

labeled_compound_plot

ggsave(labeled_compound_plot, file = paste0(outdir, "sil_2243_labeled_comp_plot.pdf"), width = 5.5, height = 2.35)


############### Growth plot
od_data = data.table(read_xlsx(paste0(datadir, "ODdata.xlsx")))
od_data[,TP9_64:=round(as.numeric(TP9_64), digits = 3)] #fix weirdness
od_data = melt(od_data, id.var = c("Strain", "AcConc", "Replicate"), variable.name = "TimePoint")
setnames(od_data, "AcConc", "AcetateGroup")
od_data[,AcConc:=gsub("L", "", AcetateGroup)]
od_data[,Labeled:=ifelse(grepl("L", AcetateGroup), "SIL", "Unlabeled")]
setnames(od_data, "Strain", "StrainFull")
od_data[,Strain:=ifelse(StrainFull=="Valencia", "Val", StrainFull)]
od_data[,Strain:=ifelse(Strain=="AB8n2", "AB8", Strain)]
od_data[,TimePoint:=gsub("_[0-9.]+$", "", TimePoint)]
od_data[,TimePoint:=paste0("TP", as.numeric(gsub("TP", "", TimePoint))-1)]
od_data[,Replicate:=as.character(Replicate)]
setnames(od_data, "value", "OD600")
time_pt_tab <- data.table(TimePoint=paste0("TP", 0:8), Time = c(0, 15, 18.5, 23,28.5, 39, 43, 47.5, 64))
od_data <- merge(od_data, time_pt_tab, by = "TimePoint", all.x = T)
mean_od_data <- od_data[,list(mean(OD600, na.rm = T), sd(OD600, na.rm = T)/sqrt(length(OD600[!is.na(OD600)]))), by=list(Time, TimePoint, Strain, StrainFull, AcConc, AcetateGroup)]

od_plot <- ggplot(mean_od_data, aes(x = Time, y = V1, color = AcetateGroup)) + geom_line() + geom_point() + 
  facet_wrap(~StrainFull) +
  geom_errorbar(aes(ymin = V1-V2, ymax = V1+V2, width = 0)) + ylab("Cell density (OD600)") + xlab("Time (hr)") +
  theme(legend.position = "right") + scale_color_manual(values = ac_group_scale, labels = c("0 acetate", "1mM acetate", "1mM labeled acetate", "10mM acetate", "10mM labeled acetate"), name = "")

ggsave(od_plot, file = paste0(outdir, "sil_growth.pdf"), width = 7.5, height = 2.4)

od_plot

6 Figure S3 - Targeted acetate quantification

###### Targeted acetate Figure S5
rm(list = ls())
library(data.table)
library(tidyverse)
library(readxl)
library(ggrepel)
library(ggpubr)
library(emmeans)
library(RColorBrewer)
datadir <- "processedDatasets/"
outdir <- "figure2/"
sessionInfo()
## R version 4.1.1 (2021-08-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] emmeans_1.7.5         paletteer_1.4.0       santaR_1.2.3         
##  [4] lubridate_1.8.0       ggbeeswarm_0.6.0      growthcurver_0.3.1   
##  [7] KEGGREST_1.32.0       patchwork_1.1.1       ggrepel_0.9.1        
## [10] ComplexHeatmap_2.13.1 ggpubr_0.4.0          RColorBrewer_1.1-2   
## [13] fastcluster_1.2.3     vegan_2.6-2           lattice_0.20-45      
## [16] permute_0.9-5         webchem_1.1.2         forcats_0.5.1        
## [19] stringr_1.4.0         dplyr_1.0.7           purrr_0.3.4          
## [22] readr_2.1.2           tidyr_1.2.0           tibble_3.1.7         
## [25] tidyverse_1.3.1       readxl_1.3.1          cowplot_1.1.1        
## [28] ggplot2_3.3.6         data.table_1.14.2    
## 
## loaded via a namespace (and not attached):
##   [1] backports_1.2.1        circlize_0.4.15        splines_4.1.1         
##   [4] GenomeInfoDb_1.30.1    digest_0.6.29          foreach_1.5.2         
##   [7] htmltools_0.5.2        magick_2.7.3           fansi_1.0.3           
##  [10] magrittr_2.0.3         cluster_2.1.3          doParallel_1.0.17     
##  [13] tzdb_0.3.0             Biostrings_2.62.0      modelr_0.1.8          
##  [16] matrixStats_0.62.0     colorspace_2.0-3       rvest_1.0.2           
##  [19] haven_2.5.0            xfun_0.31              crayon_1.4.1          
##  [22] RCurl_1.98-1.7         prismatic_1.1.0        jsonlite_1.8.0        
##  [25] iterators_1.0.13       glue_1.6.2             gtable_0.3.0          
##  [28] zlibbioc_1.40.0        XVector_0.34.0         GetoptLong_1.0.5      
##  [31] car_3.0-13             shape_1.4.6            BiocGenerics_0.40.0   
##  [34] abind_1.4-5            scales_1.1.1           mvtnorm_1.1-3         
##  [37] DBI_1.1.1              data.tree_1.0.0        rstatix_0.7.0         
##  [40] Rcpp_1.0.8.3           xtable_1.8-4           clue_0.3-61           
##  [43] stats4_4.1.1           httr_1.4.2             ellipsis_0.3.2        
##  [46] pkgconfig_2.0.3        farver_2.1.0           sass_0.4.1            
##  [49] dbplyr_2.1.1           utf8_1.2.2             tidyselect_1.1.2      
##  [52] labeling_0.4.2         rlang_1.0.2            later_1.3.0           
##  [55] munsell_0.5.0          cellranger_1.1.0       tools_4.1.1           
##  [58] cli_3.3.0              generics_0.1.0         broom_0.8.0           
##  [61] evaluate_0.15          fastmap_1.1.0          yaml_2.3.5            
##  [64] rematch2_2.1.2         knitr_1.39             fs_1.5.0              
##  [67] nlme_3.1-159           mime_0.12              xml2_1.3.2            
##  [70] compiler_4.1.1         rstudioapi_0.13        beeswarm_0.4.0        
##  [73] png_0.1-7              ggsignif_0.6.3         reprex_2.0.1          
##  [76] bslib_0.3.1            stringi_1.7.6          highr_0.9             
##  [79] Matrix_1.4-1           vctrs_0.4.1            pillar_1.7.0          
##  [82] lifecycle_1.0.0        jquerylib_0.1.4        GlobalOptions_0.1.2   
##  [85] estimability_1.4       bitops_1.0-7           httpuv_1.6.5          
##  [88] R6_2.5.1               promises_1.2.0.1       vipor_0.4.5           
##  [91] IRanges_2.28.0         codetools_0.2-18       MASS_7.3-57           
##  [94] assertthat_0.2.1       rjson_0.2.21           minpack.lm_1.2-1      
##  [97] withr_2.5.0            S4Vectors_0.32.4       GenomeInfoDbData_1.2.7
## [100] mgcv_1.8-40            parallel_4.1.1         hms_1.1.1             
## [103] coda_0.19-4            rmarkdown_2.14         carData_3.0-5         
## [106] Cairo_1.6-0            shiny_1.6.0
met_data <- fread(paste0(datadir, "targeted_met_data.csv"))
met_data[,Accuracy:=as.numeric(Accuracy)]
met_data[S_N=="∞", S_N:="Inf"]
met_data[,S_N:=as.numeric(S_N)]
met_data[,mean(S_N, na.rm = T), by=Compound]
##              Compound         V1
##  1: 2-Me-Butyric acid 138544.389
##  2:           Acetate 134242.122
##  3:        Acetate-d4        Inf
##  4:           Alanine        Inf
##  5:               Arg        Inf
##  6:     Aspartic acid        Inf
##  7:          Butanate        Inf
##  8:        Citrulline        Inf
##  9:         Glutamine        Inf
## 10:           Glycine        Inf
## 11:               IAA  10583.213
## 12:               ILA   7267.553
## 13:       Isobutanate 161849.719
## 14:   Isocaproic acid 542197.851
## 15:       Isovalerate 162773.168
## 16:       Lactic acid        Inf
## 17:         Ornithine        Inf
## 18:        Propionate        Inf
## 19:         Succinate 137001.752
## 20:               Trp   9378.787
## Questions
# How to interpret S_N
met_data[!is.na(Strain) & S_N < 10, table(Compound)]
## Compound
## 2-Me-Butyric acid        Acetate-d4           Alanine     Aspartic acid 
##                18                76                 6                36 
##          Butanate        Citrulline         Glutamine           Glycine 
##                59                 6                 3                 7 
##               IAA               ILA       Isobutanate   Isocaproic acid 
##                18                77                28                13 
##       Isovalerate       Lactic acid         Ornithine        Propionate 
##                22                14                31                33 
##         Succinate               Trp 
##                70                 2
met_data[!is.na(Strain),length(unique(Sample))]
## [1] 83
met_data[,LLOQ:=as.numeric(LLOQ)]
met_data[,ULOQ:=as.numeric(ULOQ)]
met_data[,BelowLLOQ:=ifelse(mM < LLOQ, 1, 0)]
met_data[,AboveULOQ:=ifelse(mM > ULOQ, 1, 0)]
met_data[,BadS_N:=ifelse(S_N < 10, 1, 0)]
met_data[BelowLLOQ==1 & S_N > 1e4]
##     Strain AcetateConc TimePoint Replicate well      Compound MS_Sample
##  1:                 NA                  NA          Glutamine          
##  2:                 NA                  NA            Glycine          
##  3:                 NA                  NA        Lactic acid          
##  4:   2243           0       TP4         1   E2     Glutamine      S.E2
##  5:   2243           0       TP4         2   E5 Aspartic acid      S.E5
##  6:   2243           0       TP4         3   B3 Aspartic acid      S.B3
##  7:   2243           0       TP7         1   E7 Aspartic acid      S.E7
##  8:   2243           0       TP7         2   E9    Acetate-d4      S.E9
##  9:   2243           0       TP7         3   E3 Aspartic acid      S.E3
## 10:   2243          10       TP4         3   G5 Aspartic acid      S.G5
## 11:   2243          10       TP4         3   G5     Glutamine      S.G5
## 12:   2243          10       TP7         1   C7 Aspartic acid      S.C7
## 13:   2243           1       TP4         1   A2     Glutamine      S.A2
## 14:   2243           1       TP7         2   A9       Alanine      S.A9
## 15:   2243          10       TP8         2  C11 Aspartic acid     S.C11
## 16:   2243          10       TP8         2  C11      Butanate     S.C11
## 17:    AB8           0       TP4         1   F2       Alanine      S.F2
## 18:    AB8           0       TP4         1   F2     Glutamine      S.F2
## 19:    AB8           0       TP4         2   F5       Alanine      S.F5
## 20:    AB8           0       TP4         3   C3 Aspartic acid      S.C3
## 21:    AB8           0       TP7         1   F7 Aspartic acid      S.F7
## 22:    AB8           0       TP7         2   F9 Aspartic acid      S.F9
## 23:    AB8           0       TP7         3   F3     Glutamine      S.F3
## 24:    AB8          10       TP4         1   D2 Aspartic acid      S.D2
## 25:    AB8          10       TP4         2   D5       Alanine      S.D5
## 26:    AB8          10       TP7         1   D7       Alanine      S.D7
## 27:    AB8          10       TP7         1   D7 Aspartic acid      S.D7
## 28:    AB8          10       TP7         2   D9 Aspartic acid      S.D9
## 29:    AB8           1       TP7         3   H9     Glutamine      S.H9
## 30:    AB8           1       TP8         1  B10           Arg     S.B10
## 31:    AB8           1       TP8         2  D10           Arg     S.D10
## 32:    AB8           1       TP8         2  D10 Aspartic acid     S.D10
## 33:    AB8           1       TP8         3  F10           Arg     S.F10
## 34:    AB8           1       TP8         3  F10 Aspartic acid     S.F10
## 35:    AB8           1       TP4         1   B2       Alanine      S.B2
## 36:    AB8           1       TP4         1   B2 Aspartic acid      S.B2
## 37:    AB8           1       TP4         1   B2     Glutamine      S.B2
## 38:    AB8           1       TP4         2   B5       Alanine      S.B5
## 39:    AB8           1       TP4         2   B5 Aspartic acid      S.B5
## 40:    AB8           1       TP4         2   B5    Propionate      S.B5
## 41:    AB8           1       TP4         3   H2       Alanine      S.H2
## 42:    AB8           1       TP4         3   H2   Lactic acid      S.H2
## 43:    AB8           1       TP7         1   B7       Alanine      S.B7
## 44:    AB8           1       TP7         1   B7     Glutamine      S.B7
## 45:    AB8           1       TP7         2   B9     Glutamine      S.B9
## 46:    AB8          10       TP8         1  B11       Alanine     S.B11
## 47:    AB8          10       TP8         3  F11       Alanine     S.F11
## 48:    AB8          10       TP8         3  F11           Arg     S.F11
## 49:    Val           0       TP4         2   F4 Aspartic acid      S.F4
## 50:    Val           0       TP4         2   F4     Glutamine      S.F4
## 51:    Val           0       TP4         3   A3 Aspartic acid      S.A3
## 52:    Val           0       TP4         3   A3     Glutamine      S.A3
## 53:    Val           0       TP7         1   F6       Alanine      S.F6
## 54:    Val           0       TP7         1   F6 Aspartic acid      S.F6
## 55:    Val           0       TP7         2   F8     Glutamine      S.F8
## 56:    Val           0       TP7         3   D3       Alanine      S.D3
## 57:    Val           0       TP7         3   D3 Aspartic acid      S.D3
## 58:    Val           0       TP7         3   D3   Lactic acid      S.D3
## 59:    Val          10       TP4         2   D4     Glutamine      S.D4
## 60:    Val          10       TP7         2   D8 Aspartic acid      S.D8
## 61:    Val          10       TP7         2   D8      Butanate      S.D8
## 62:    Val           1       TP7         3   G8 Aspartic acid      S.G8
## 63:    Val           1       TP4         1   B1     Glutamine      S.B1
## 64:    Val           1       TP4         2   B4 Aspartic acid      S.B4
## 65:    Val           1       TP4         3   G1    Acetate-d4      S.G1
## 66:    Val           1       TP7         2   B8 Aspartic acid      S.B8
## 67:      c           0       TP4         1   E1   Lactic acid      S.E1
## 68:      c           0       TP4         2   E4 Aspartic acid      S.E4
## 69:      c           0       TP4         2   E4   Lactic acid      S.E4
## 70:      c           0       TP4         2   E4    Propionate      S.E4
## 71:      c           0       TP7         1   E6 Aspartic acid      S.E6
## 72:      c           0       TP7         2   E8 Aspartic acid      S.E8
## 73:      c           0       TP7         2   E8    Propionate      S.E8
## 74:      c          10       TP4         1   C1 Aspartic acid      S.C1
## 75:      c          10       TP4         1   C1   Lactic acid      S.C1
## 76:      c           1       TP8         2  H10 Aspartic acid     S.H10
## 77:      c           1       TP4         2   A4    Citrulline      S.A4
## 78:      c          10       TP8         2  H11 Aspartic acid     S.H11
## 79:      c          10       TP8         2  H11   Lactic acid     S.H11
##     Strain AcetateConc TimePoint Replicate well      Compound MS_Sample
##         Location Accuracy     S_N   iSTD_Area          mM       LLOQ ULOQ Row
##  1: Std_PBSSt_09 133.9407     Inf   13317.555 0.007848088 0.01276341  1.5    
##  2: Std_PBSSt_06 127.1703     Inf  267555.886 0.059611080 0.07695009  1.5    
##  3: Std_PBSSt_09 126.4290     Inf 2847892.614 0.014815901 0.02365441  3.0    
##  4:       P1-E02       NA     Inf   13609.560 0.010380122 0.01276341  1.5   E
##  5:       P1-E05       NA     Inf   11519.064 0.000000000 0.10582321  1.5   E
##  6:       P1-B03       NA     Inf   10086.437 0.000000000 0.10582321  1.5   B
##  7:       P1-E07       NA     Inf   10216.210 0.000000000 0.10582321  1.5   E
##  8:       P1-E09       NA     Inf 3716180.478 0.020977126 0.23659610 30.0   E
##  9:       P1-E03       NA     Inf   12936.341 0.000000000 0.10582321  1.5   E
## 10:       P1-G05       NA     Inf    8455.305 0.000000000 0.10582321  1.5   G
## 11:       P1-G05       NA     Inf   13051.871 0.008988906 0.01276341  1.5   G
## 12:       P1-C07       NA     Inf    8940.437 0.000000000 0.10582321  1.5   C
## 13:       P1-A02       NA     Inf   13598.638 0.012228330 0.01276341  1.5   A
## 14:       P1-A09       NA     Inf  237354.466 0.034080913 0.04225643  1.5   A
## 15:       P1-C11       NA     Inf   11720.259 0.000000000 0.10582321  1.5   C
## 16:       P1-C11       NA     Inf 4139916.665 0.011578595 0.04182426  3.0   C
## 17:       P1-F02       NA     Inf  237172.273 0.000000000 0.04225643  1.5   F
## 18:       P1-F02       NA     Inf   13896.045 0.006668622 0.01276341  1.5   F
## 19:       P1-F05       NA     Inf  221704.533 0.000000000 0.04225643  1.5   F
## 20:       P1-C03       NA     Inf   11828.588 0.000000000 0.10582321  1.5   C
## 21:       P1-F07       NA     Inf    6116.839 0.000000000 0.10582321  1.5   F
## 22:       P1-F09       NA     Inf    9491.304 0.000000000 0.10582321  1.5   F
## 23:       P1-F03       NA     Inf   12522.156 0.008031104 0.01276341  1.5   F
## 24:       P1-D02       NA     Inf   12449.945 0.000000000 0.10582321  1.5   D
## 25:       P1-D05       NA     Inf  215118.742 0.000000000 0.04225643  1.5   D
## 26:       P1-D07       NA     Inf  238847.135 0.000000000 0.04225643  1.5   D
## 27:       P1-D07       NA     Inf   11397.935 0.000000000 0.10582321  1.5   D
## 28:       P1-D09       NA     Inf    9573.691 0.000000000 0.10582321  1.5   D
## 29:       P1-C12       NA     Inf   14281.698 0.012760079 0.01276341  1.5   H
## 30:       P1-B10       NA     Inf   13165.290 0.203157239 0.25452760 75.0   B
## 31:       P1-D10       NA     Inf   18900.441 0.218943341 0.25452760 75.0   D
## 32:       P1-D10       NA     Inf   10792.889 0.000000000 0.10582321  1.5   D
## 33:       P1-F10       NA     Inf   20038.595 0.209094601 0.25452760 75.0   F
## 34:       P1-F10       NA     Inf   10965.632 0.000000000 0.10582321  1.5   F
## 35:       P1-B02       NA     Inf  267477.513 0.000000000 0.04225643  1.5   B
## 36:       P1-B02       NA     Inf   10237.513 0.000000000 0.10582321  1.5   B
## 37:       P1-B02       NA     Inf   17466.593 0.003637131 0.01276341  1.5   B
## 38:       P1-B05       NA     Inf  239083.336 0.000000000 0.04225643  1.5   B
## 39:       P1-B05       NA     Inf   10388.202 0.000000000 0.10582321  1.5   B
## 40:       P1-B05       NA     Inf 4233599.429 0.000000000 0.03765327  3.0   B
## 41:       P1-G03       NA     Inf  242124.194 0.000000000 0.04225643  1.5   H
## 42:       P1-G03       NA     Inf 4639231.400 0.021531672 0.02365441  3.0   H
## 43:       P1-B07       NA     Inf  226236.422 0.000000000 0.04225643  1.5   B
## 44:       P1-B07       NA     Inf   12395.605 0.011970025 0.01276341  1.5   B
## 45:       P1-B09       NA     Inf   14023.541 0.011520241 0.01276341  1.5   B
## 46:       P1-B11       NA     Inf  207443.130 0.000000000 0.04225643  1.5   B
## 47:       P1-F11       NA     Inf  212096.498 0.000000000 0.04225643  1.5   F
## 48:       P1-F11       NA     Inf   19479.220 0.243873203 0.25452760 75.0   F
## 49:       P1-F04       NA     Inf    7779.508 0.000000000 0.10582321  1.5   F
## 50:       P1-F04       NA     Inf   12807.308 0.004922372 0.01276341  1.5   F
## 51:       P1-A03       NA     Inf    9502.729 0.000000000 0.10582321  1.5   A
## 52:       P1-A03       NA     Inf   13742.967 0.004083988 0.01276341  1.5   A
## 53:       P1-F06       NA     Inf  220093.700 0.000000000 0.04225643  1.5   F
## 54:       P1-F06       NA     Inf   12259.053 0.000000000 0.10582321  1.5   F
## 55:       P1-F08       NA     Inf   15491.828 0.011640307 0.01276341  1.5   F
## 56:       P1-D03       NA     Inf  234810.563 0.000000000 0.04225643  1.5   D
## 57:       P1-D03       NA     Inf   10652.036 0.000000000 0.10582321  1.5   D
## 58:       P1-D03       NA     Inf 4145786.201 0.020031942 0.02365441  3.0   D
## 59:       P1-D04       NA     Inf   13036.605 0.006170081 0.01276341  1.5   D
## 60:       P1-D08       NA     Inf    9566.066 0.000000000 0.10582321  1.5   D
## 61:       P1-D08       NA     Inf 3936465.219 0.007829900 0.04182426  3.0   D
## 62:       P1-G08       NA     Inf    8713.082 0.000000000 0.10582321  1.5   G
## 63:       P1-B01       NA     Inf   14346.205 0.006313181 0.01276341  1.5   B
## 64:       P1-B04       NA     Inf   12014.545 0.000000000 0.10582321  1.5   B
## 65:       P1-G01       NA     Inf 4556438.427 0.022587655 0.23659610 30.0   G
## 66:       P1-B08       NA     Inf    9955.895 0.000000000 0.10582321  1.5   B
## 67:       P1-E01       NA     Inf 4379147.683 0.003800583 0.02365441  3.0   E
## 68:       P1-E04       NA     Inf    8792.582 0.000000000 0.10582321  1.5   E
## 69:       P1-E04       NA     Inf 4375688.576 0.007344757 0.02365441  3.0   E
## 70:       P1-E04       NA 10409.7 4375688.576 0.000000000 0.03765327  3.0   E
## 71:       P1-E06       NA     Inf   11833.863 0.000000000 0.10582321  1.5   E
## 72:       P1-E08       NA     Inf    9980.063 0.000000000 0.10582321  1.5   E
## 73:       P1-E08       NA     Inf 4225784.075 0.000000000 0.03765327  3.0   E
## 74:       P1-C01       NA     Inf   12865.803 0.000000000 0.10582321  1.5   C
## 75:       P1-C01       NA     Inf 4509021.797 0.005571540 0.02365441  3.0   C
## 76:       P1-D12       NA     Inf   11855.931 0.000000000 0.10582321  1.5   H
## 77:       P1-A04       NA     Inf   15242.210 0.000000000 0.02753048 30.0   A
## 78:       P1-E12       NA     Inf   12602.132 0.000000000 0.10582321  1.5   H
## 79:       P1-E12       NA     Inf 4257567.360 0.011125312 0.02365441  3.0   H
##         Location Accuracy     S_N   iSTD_Area          mM       LLOQ ULOQ Row
##     Column        Sample OD600 Time BelowLLOQ AboveULOQ BadS_N
##  1:     NA                  NA   NA         1         0      0
##  2:     NA                  NA   NA         1         0      0
##  3:     NA                  NA   NA         1         0      0
##  4:      2  2243_0_TP4_1 0.162 28.5         1         0      0
##  5:      5  2243_0_TP4_2 0.167 28.5         1         0      0
##  6:      3  2243_0_TP4_3 0.142 28.5         1         0      0
##  7:      7  2243_0_TP7_1 0.396 47.5         1         0      0
##  8:      9  2243_0_TP7_2 0.395 47.5         1         0      0
##  9:      3  2243_0_TP7_3 0.299 47.5         1         0      0
## 10:      5  2243_1_TP4_3 0.212 28.5         1         0      0
## 11:      5  2243_1_TP4_3 0.212 28.5         1         0      0
## 12:      7  2243_1_TP7_1 0.583 47.5         1         0      0
## 13:      2 2243_10_TP4_1 0.174 28.5         1         0      0
## 14:      9 2243_10_TP7_2 0.733 47.5         1         0      0
## 15:     11 2243_10_TP8_2 0.907 64.0         1         0      0
## 16:     11 2243_10_TP8_2 0.907 64.0         1         0      0
## 17:      2   AB8_0_TP4_1 0.201 28.5         1         0      0
## 18:      2   AB8_0_TP4_1 0.201 28.5         1         0      0
## 19:      5   AB8_0_TP4_2 0.168 28.5         1         0      0
## 20:      3   AB8_0_TP4_3 0.150 28.5         1         0      0
## 21:      7   AB8_0_TP7_1 0.424 47.5         1         0      0
## 22:      9   AB8_0_TP7_2 0.324 47.5         1         0      0
## 23:      3   AB8_0_TP7_3 0.296 47.5         1         0      0
## 24:      2   AB8_1_TP4_1 0.351 28.5         1         0      0
## 25:      5   AB8_1_TP4_2 0.296 28.5         1         0      0
## 26:      7   AB8_1_TP7_1 0.992 47.5         1         0      0
## 27:      7   AB8_1_TP7_1 0.992 47.5         1         0      0
## 28:      9   AB8_1_TP7_2 0.876 47.5         1         0      0
## 29:      9   AB8_1_TP7_3 0.885 47.5         1         0      0
## 30:     10   AB8_1_TP8_1 0.947 64.0         1         0      0
## 31:     10   AB8_1_TP8_2 0.901 64.0         1         0      0
## 32:     10   AB8_1_TP8_2 0.901 64.0         1         0      0
## 33:     10   AB8_1_TP8_3 0.904 64.0         1         0      0
## 34:     10   AB8_1_TP8_3 0.904 64.0         1         0      0
## 35:      2  AB8_10_TP4_1 0.313 28.5         1         0      0
## 36:      2  AB8_10_TP4_1 0.313 28.5         1         0      0
## 37:      2  AB8_10_TP4_1 0.313 28.5         1         0      0
## 38:      5  AB8_10_TP4_2 0.288 28.5         1         0      0
## 39:      5  AB8_10_TP4_2 0.288 28.5         1         0      0
## 40:      5  AB8_10_TP4_2 0.288 28.5         1         0      0
## 41:      2  AB8_10_TP4_3 0.289 28.5         1         0      0
## 42:      2  AB8_10_TP4_3 0.289 28.5         1         0      0
## 43:      7  AB8_10_TP7_1 0.918 47.5         1         0      0
## 44:      7  AB8_10_TP7_1 0.918 47.5         1         0      0
## 45:      9  AB8_10_TP7_2 0.824 47.5         1         0      0
## 46:     11  AB8_10_TP8_1 0.913 64.0         1         0      0
## 47:     11  AB8_10_TP8_3 0.878 64.0         1         0      0
## 48:     11  AB8_10_TP8_3 0.878 64.0         1         0      0
## 49:      4   Val_0_TP4_2 0.130 28.5         1         0      0
## 50:      4   Val_0_TP4_2 0.130 28.5         1         0      0
## 51:      3   Val_0_TP4_3 0.096 28.5         1         0      0
## 52:      3   Val_0_TP4_3 0.096 28.5         1         0      0
## 53:      6   Val_0_TP7_1 0.249 47.5         1         0      0
## 54:      6   Val_0_TP7_1 0.249 47.5         1         0      0
## 55:      8   Val_0_TP7_2 0.260 47.5         1         0      0
## 56:      3   Val_0_TP7_3 0.176 47.5         1         0      0
## 57:      3   Val_0_TP7_3 0.176 47.5         1         0      0
## 58:      3   Val_0_TP7_3 0.176 47.5         1         0      0
## 59:      4   Val_1_TP4_2 0.179 28.5         1         0      0
## 60:      8   Val_1_TP7_2 0.689 47.5         1         0      0
## 61:      8   Val_1_TP7_2 0.689 47.5         1         0      0
## 62:      8   Val_1_TP7_3 0.778 47.5         1         0      0
## 63:      1  Val_10_TP4_1 0.174 28.5         1         0      0
## 64:      4  Val_10_TP4_2 0.193 28.5         1         0      0
## 65:      1  Val_10_TP4_3 0.204 28.5         1         0      0
## 66:      8  Val_10_TP7_2 0.765 47.5         1         0      0
## 67:      1     c_0_TP4_1    NA   NA         1         0      0
## 68:      4     c_0_TP4_2    NA   NA         1         0      0
## 69:      4     c_0_TP4_2    NA   NA         1         0      0
## 70:      4     c_0_TP4_2    NA   NA         1         0      0
## 71:      6     c_0_TP7_1    NA   NA         1         0      0
## 72:      8     c_0_TP7_2    NA   NA         1         0      0
## 73:      8     c_0_TP7_2    NA   NA         1         0      0
## 74:      1     c_1_TP4_1    NA   NA         1         0      0
## 75:      1     c_1_TP4_1    NA   NA         1         0      0
## 76:     10     c_1_TP8_2    NA   NA         1         0      0
## 77:      4    c_10_TP4_2    NA   NA         1         0      0
## 78:     11    c_10_TP8_2    NA   NA         1         0      0
## 79:     11    c_10_TP8_2    NA   NA         1         0      0
##     Column        Sample OD600 Time BelowLLOQ AboveULOQ BadS_N
met_data[,PresentButUnreliable:=ifelse(BelowLLOQ==1|BadS_N, 1, 0)]

met_data[TimePoint=="TP4", Time:=28.5]
met_data[TimePoint=="TP7", Time:=47.5]
met_data[TimePoint=="TP8", Time:=64]
met_data[,Time:=as.numeric(Time)]
met_data[,Strain:=factor(Strain, levels = c("c", "2243", "AB8", "Val"))]
levels(met_data$Strain) = c("Ctrl", "2243", "AB8n2", "Valencia")

met_data[,Acetate2:=factor(AcetateConc)]
levels(met_data$Acetate2) <- c("0mM Ac", "1mM Ac", "10mM Ac")
acetate_plot <- ggplot(met_data[Compound == "Acetate" & !is.na(Time)], aes(x = Time, y = mM-1.05, color = Strain)) + geom_point() + geom_line(aes(group = paste0(Strain, Replicate))) + facet_wrap(~HypAcetateConc, nrow = 1, scales = "free_y") + cowplot::theme_cowplot() + ylab("Concentration (mM)") + scale_color_manual(values = c("gray", brewer.pal(3, "Set1"))) + xlab("Time (hr)")

### Stats
acetate_data_sub <- met_data[Compound == "Acetate" & !is.na(Time)]
acetate_data_sub[,mMCorrected:=mM - 1.05]

acetate_data_sub[,Condition:=paste0(as.character(Acetate2), "_", Time)]

#### Separate by acetate group since the SE is so different between them
mod0ac <- lm(mMCorrected ~ factor(Time)*Strain, data = acetate_data_sub[Acetate2 == "0mM Ac"])
diffs0ac <- emmeans(mod0ac, specs = ~ Strain | Time, contr = "dunnett")$contrasts %>% as.data.frame %>% data.table()
diffs0ac[,Acetate2:="0mM Ac"]

mod1ac <- lm(mMCorrected ~ factor(Time)*Strain, data = acetate_data_sub[Acetate2 == "1mM Ac"])
diffs1ac <- emmeans(mod1ac, specs = ~ Strain | Time, contr = "dunnett")$contrasts %>% as.data.frame %>% data.table()
diffs1ac[,Acetate2:="1mM Ac"]

mod10ac <- lm(mMCorrected ~ factor(Time)*Strain, data = acetate_data_sub[Acetate2 == "10mM Ac"])
diffs10ac <- emmeans(mod10ac, specs = ~ Strain | Time, contr = "dunnett")$contrasts %>% as.data.frame %>% data.table()
diffs10ac[,Acetate2:="10mM Ac"]
diffs_all <- rbind(diffs0ac, diffs1ac, diffs10ac)
diffs_all[,Strain:=gsub(" - Ctrl", "", contrast)]
diffs_all[,Signif:=case_when(
  p.value < 0.1 & p.value > 0.05 ~ ".",
  p.value < 0.05 & p.value > 0.01 ~ "*",
  p.value < 0.01 & p.value > 0.001 ~ "**", 
  p.value < 0.001 ~ "***", 
  TRUE ~ ""
)]
diffs_all[,ylev:=case_when(
  Acetate2 == "0mM Ac" ~ 0.5,
  Acetate2 == "1mM Ac" ~ 1.85,
  Acetate2 == "10mM Ac" ~ 18.5
)]
diffs_all[,ylev:=case_when(
  Strain == "2243" & Acetate2 == "1mM Ac" ~ ylev + 0.1,
  Strain == "Valencia" & Acetate2 == "1mM Ac" ~ ylev-0.1,
  TRUE ~ ylev
)]
diffs_all[,Acetate2:=factor(Acetate2, levels = c("0mM Ac", "1mM Ac", "10mM Ac"))]
ac_loq <- met_data[Compound=="Acetate", unique(LLOQ)]

## Plot data
acetate_means <- met_data[Compound == "Acetate" & !is.na(Time), list(mean(mM-1.05), sd(mM-1.05)/sqrt(length(mM))), by = list(Time, Strain, Acetate2)]
acetate_plot <- ggplot(acetate_means, aes(x = Time, y = V1, color = Strain)) + 
  geom_hline(yintercept = ac_loq, linetype = 2) +
  geom_line(aes(group = Strain)) + geom_point() + 
  geom_errorbar(aes(ymin = V1-V2, ymax = V1+V2), width = 0)+ 
  geom_text(data = diffs_all, aes(y = ylev, x = Time, label = Signif)) + scale_y_continuous(limits=c(0, NA), expand = expansion(mult = c(0, 0.045))) + 
  theme(strip.background = element_blank()) + facet_wrap(~Acetate2, nrow = 1, scales = "free_y") + 
  ylab("Concentration (mM)") + scale_x_continuous(expand = expansion(add = c(4, 4))) +
  scale_color_manual(values = c("gray", brewer.pal(3, "Set1"))) + xlab("Time (hr)") 

ggsave(acetate_plot, file = paste0(outdir, "targeted_acetate_quant.pdf"), width = 6, height = 2)

acetate_plot

#### WE will separate by acetate group since the SE is so different between them
arg_data_sub <- met_data[Compound == "Arg" & !is.na(Time)]
mod0ac <- lm(mM ~ factor(Time)*Strain, data = arg_data_sub[Acetate2 == "0mM Ac"])
diffs0ac <- emmeans(mod0ac, specs = ~ Strain | Time, contr = "dunnett")$contrasts %>% as.data.frame %>% data.table()
diffs0ac[,Acetate2:="0mM Ac"]

mod1ac <- lm(mM ~ factor(Time)*Strain, data = arg_data_sub[Acetate2 == "1mM Ac"])
diffs1ac <- emmeans(mod1ac, specs = ~ Strain | Time, contr = "dunnett")$contrasts %>% as.data.frame %>% data.table()
diffs1ac[,Acetate2:="1mM Ac"]

mod10ac <- lm(mM ~ factor(Time)*Strain, data = arg_data_sub[Acetate2 == "10mM Ac"])
diffs10ac <- emmeans(mod10ac, specs = ~ Strain | Time, contr = "dunnett")$contrasts %>% as.data.frame %>% data.table()
diffs10ac[,Acetate2:="10mM Ac"]
diffs_all <- rbind(diffs0ac, diffs1ac, diffs10ac)
diffs_all[,Strain:=gsub(" - Ctrl", "", contrast)]
diffs_all[,Signif:=case_when(
  p.value < 0.1 & p.value > 0.05 ~ ".",
  p.value < 0.05 & p.value > 0.01 ~ "*",
  p.value < 0.01 & p.value > 0.001 ~ "**", 
  p.value < 0.001 ~ "***", 
  TRUE ~ ""
)]
diffs_all[,ylev:=case_when(
  Acetate2 == "0mM Ac" ~ 72,
  Acetate2 == "1mM Ac" ~ 72,
  Acetate2 == "10mM Ac" ~ 72
)]
diffs_all[,ylev:=case_when(
  Strain == "2243" & Acetate2 == "1mM Ac" ~ ylev + 0.1,
  Strain == "Valencia" & Acetate2 == "1mM Ac" ~ ylev-0.1,
  TRUE ~ ylev
)]
diffs_all[,Acetate2:=factor(Acetate2, levels = c("0mM Ac", "1mM Ac", "10mM Ac"))]
arg_means <- met_data[Compound == "Arg" & !is.na(Time), list(mean(mM), sd(mM)/sqrt(length(mM))), by = list(Time, Strain, Acetate2)]
arg_plot <- ggplot(arg_means, aes(x = Time, y = V1, color = Strain)) + geom_hline(yintercept = 0, linetype = 2) + 
  geom_line(aes(group = Strain)) + geom_point() + 
  geom_text(data = diffs_all, aes(y = ylev, x = Time, label = Signif)) + scale_y_continuous(limits=c(0, NA), expand = expansion(mult = c(0, 0.045))) + 
  geom_errorbar(aes(ymin = V1-V2, ymax = V1+V2), width = 0)+ 
  scale_y_continuous(limits=c(0, NA), expand = expansion(mult = c(0, 0.045))) + 
  theme(strip.background = element_blank()) + facet_wrap(~Acetate2, nrow = 1) + 
  ylab("Concentration of arginine (mM)") + scale_x_continuous(expand = expansion(add = c(4, 4))) +
  scale_color_manual(values = c("gray", brewer.pal(3, "Set1"))) + xlab("Time (hr)") 
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
ggsave(arg_plot, file = paste0(outdir, "targeted_arg_plot.pdf"), width = 6, height = 2)

arg_plot

7 Figure S5 - Arginine SIRM

rm(list = ls())
library(data.table)
library(tidyverse)
library(ggpubr)
outdir <- "figure_s6/"
dir.create(outdir)
theme_set(theme_classic2())

sessionInfo()
## R version 4.1.1 (2021-08-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] emmeans_1.7.5         paletteer_1.4.0       santaR_1.2.3         
##  [4] lubridate_1.8.0       ggbeeswarm_0.6.0      growthcurver_0.3.1   
##  [7] KEGGREST_1.32.0       patchwork_1.1.1       ggrepel_0.9.1        
## [10] ComplexHeatmap_2.13.1 ggpubr_0.4.0          RColorBrewer_1.1-2   
## [13] fastcluster_1.2.3     vegan_2.6-2           lattice_0.20-45      
## [16] permute_0.9-5         webchem_1.1.2         forcats_0.5.1        
## [19] stringr_1.4.0         dplyr_1.0.7           purrr_0.3.4          
## [22] readr_2.1.2           tidyr_1.2.0           tibble_3.1.7         
## [25] tidyverse_1.3.1       readxl_1.3.1          cowplot_1.1.1        
## [28] ggplot2_3.3.6         data.table_1.14.2    
## 
## loaded via a namespace (and not attached):
##   [1] backports_1.2.1        circlize_0.4.15        splines_4.1.1         
##   [4] GenomeInfoDb_1.30.1    digest_0.6.29          foreach_1.5.2         
##   [7] htmltools_0.5.2        magick_2.7.3           fansi_1.0.3           
##  [10] magrittr_2.0.3         cluster_2.1.3          doParallel_1.0.17     
##  [13] tzdb_0.3.0             Biostrings_2.62.0      modelr_0.1.8          
##  [16] matrixStats_0.62.0     colorspace_2.0-3       rvest_1.0.2           
##  [19] haven_2.5.0            xfun_0.31              crayon_1.4.1          
##  [22] RCurl_1.98-1.7         prismatic_1.1.0        jsonlite_1.8.0        
##  [25] iterators_1.0.13       glue_1.6.2             gtable_0.3.0          
##  [28] zlibbioc_1.40.0        XVector_0.34.0         GetoptLong_1.0.5      
##  [31] car_3.0-13             shape_1.4.6            BiocGenerics_0.40.0   
##  [34] abind_1.4-5            scales_1.1.1           mvtnorm_1.1-3         
##  [37] DBI_1.1.1              data.tree_1.0.0        rstatix_0.7.0         
##  [40] Rcpp_1.0.8.3           xtable_1.8-4           clue_0.3-61           
##  [43] stats4_4.1.1           httr_1.4.2             ellipsis_0.3.2        
##  [46] pkgconfig_2.0.3        farver_2.1.0           sass_0.4.1            
##  [49] dbplyr_2.1.1           utf8_1.2.2             tidyselect_1.1.2      
##  [52] labeling_0.4.2         rlang_1.0.2            later_1.3.0           
##  [55] munsell_0.5.0          cellranger_1.1.0       tools_4.1.1           
##  [58] cli_3.3.0              generics_0.1.0         broom_0.8.0           
##  [61] evaluate_0.15          fastmap_1.1.0          yaml_2.3.5            
##  [64] rematch2_2.1.2         knitr_1.39             fs_1.5.0              
##  [67] nlme_3.1-159           mime_0.12              xml2_1.3.2            
##  [70] compiler_4.1.1         rstudioapi_0.13        beeswarm_0.4.0        
##  [73] png_0.1-7              ggsignif_0.6.3         reprex_2.0.1          
##  [76] bslib_0.3.1            stringi_1.7.6          highr_0.9             
##  [79] Matrix_1.4-1           vctrs_0.4.1            pillar_1.7.0          
##  [82] lifecycle_1.0.0        jquerylib_0.1.4        GlobalOptions_0.1.2   
##  [85] estimability_1.4       bitops_1.0-7           httpuv_1.6.5          
##  [88] R6_2.5.1               promises_1.2.0.1       vipor_0.4.5           
##  [91] IRanges_2.28.0         codetools_0.2-18       MASS_7.3-57           
##  [94] assertthat_0.2.1       rjson_0.2.21           minpack.lm_1.2-1      
##  [97] withr_2.5.0            S4Vectors_0.32.4       GenomeInfoDbData_1.2.7
## [100] mgcv_1.8-40            parallel_4.1.1         hms_1.1.1             
## [103] coda_0.19-4            rmarkdown_2.14         carData_3.0-5         
## [106] Cairo_1.6-0            shiny_1.6.0
datadir <- "../SILArginine2022-03/"
hilic_intra <- readRDS(paste0(datadir, "SIL_Intra_HILIC_LowLevelDataAllProcessed.rds"))

labeled_comps <- hilic_intra[,sum(AreaFrac), by=list(CompID, NumLabeledC)][NumLabeledC > 0 & V1 > 1e5, unique(CompID)]

frac_labeling <- hilic_intra[ArgGroup=="HR" & Strain != "c",list(sum(AreaFrac[NumLabeledC==0]), sum(AreaFrac[NumLabeledC != 0])), by=CompID]
frac_labeling[,LabelFrac:=V2/(V2+V1)]

median_dat <- hilic_intra[,list(median(AreaFrac), mean(AreaFrac), sd(AreaFrac)/sqrt(length(AreaFrac))), by = list(CompID, FeatID, Name, Formula, MSI, NumLabeledC, ArgGroup, Strain, TimePoint, Time)]
most_abund_iso <- median_dat[ArgGroup == "HR" & Strain == "2243",list(NumLabeledC[which.max(V2)], max(V2)), by = list(CompID, Formula)]
most_abund_iso[order(V1, decreasing = T)][1:20]
##                                                    CompID          Formula V1
##  1:                   348.22374_9.438_Unknown_Unknown_Pos    C12 H28 N8 O4 12
##  2:                   372.17373_8.808_Unknown_Unknown_Pos   C13 H28 N2 O10 12
##  3:   350.19186_8.767_Labeled Unknown_Labeled Unknown_Pos    C12 H26 N6 O6 12
##  4:                     404.11103_8.8_Unknown_Unknown_Pos C12 H26 N2 O9 P2 11
##  5:                   264.18017_9.581_Unknown_Unknown_Pos    C10 H24 N4 O4 10
##  6:   217.11769_8.519_Labeled Unknown_Labeled Unknown_Pos     C7 H15 N5 O3  7
##  7:                   216.12228_8.032_Unknown_Unknown_Pos     C8 H16 N4 O3  6
##  8:                   202.10682_8.214_Unknown_Unknown_Pos     C7 H14 N4 O3  6
##  9:                   174.11167_9.894_Unknown_Unknown_Pos     C6 H14 N4 O2  6
## 10:           174.11163_9.439_DL-Arginine_DL-Arginine_Neg     C6 H14 N4 O2  6
## 11:           174.11167_9.439_DL-Arginine_DL-Arginine_Pos     C6 H14 N4 O2  6
## 12:   158.06929_8.766_Labeled Unknown_Labeled Unknown_Pos     C6 H10 N2 O3  6
## 13: 175.09578_8.766_L-(+)-Citrulline_L-(+)-Citrulline_Pos     C6 H13 N3 O3  6
## 14:   111.07977_5.659_Labeled Unknown_Labeled Unknown_Pos         C5 H9 N3  5
## 15:                    129.09041_9.44_Unknown_Unknown_Pos      C5 H11 N3 O  5
## 16:                   132.08991_9.439_Unknown_Unknown_Neg     C5 H12 N2 O2  5
## 17:                   173.11665_9.234_Unknown_Unknown_Pos     C7 H15 N3 O2  5
## 18:   174.10067_8.188_Labeled Unknown_Labeled Unknown_Pos     C7 H14 N2 O3  5
## 19:   129.09037_5.657_Labeled Unknown_Labeled Unknown_Pos      C5 H11 N3 O  5
## 20:                    216.14758_8.58_Unknown_Unknown_Pos    C10 H20 N2 O3  5
##               V2
##  1:   32671283.1
##  2:    7334523.1
##  3:   74638415.9
##  4:     973554.1
##  5:   13346435.7
##  6:   16719255.0
##  7:    1611326.5
##  8:    2620802.6
##  9:   15436774.7
## 10:  169671769.7
## 11: 2433432201.2
## 12:  603774145.5
## 13: 1814734154.5
## 14:    6258371.8
## 15:    8208446.8
## 16:   10903381.0
## 17:    2186815.1
## 18:    2719810.6
## 19:   61488051.2
## 20:    5357241.5
most_abund_iso[V1 != 0 & grepl("Pos", CompID)][order(V2, decreasing = T)][1:30]
##                                                      CompID        Formula V1
##  1:             174.11167_9.439_DL-Arginine_DL-Arginine_Pos   C6 H14 N4 O2  6
##  2:   175.09578_8.766_L-(+)-Citrulline_L-(+)-Citrulline_Pos   C6 H13 N3 O3  6
##  3:        132.08999_9.58_L(+)-Ornithine_L(+)-Ornithine_Pos   C5 H12 N2 O2  5
##  4:     115.06342_9.583_Labeled Unknown_Labeled Unknown_Pos     C5 H9 N O2  5
##  5:     158.06929_8.766_Labeled Unknown_Labeled Unknown_Pos   C6 H10 N2 O3  6
##  6:             114.07955_9.582_Prolinamide_Prolinamide_Pos    C5 H10 N2 O  5
##  7:     350.19186_8.767_Labeled Unknown_Labeled Unknown_Pos  C12 H26 N6 O6 12
##  8: 174.10069_8.327_N-Acetylornithine_N-Acetylornithine_Pos   C7 H14 N2 O3  5
##  9:     129.09037_5.657_Labeled Unknown_Labeled Unknown_Pos    C5 H11 N3 O  5
## 10:      69.05802_9.583_Labeled Unknown_Labeled Unknown_Pos        C4 H7 N  4
## 11:     132.08997_9.897_Labeled Unknown_Labeled Unknown_Pos   C5 H12 N2 O2  5
## 12:     156.09008_7.382_Labeled Unknown_Labeled Unknown_Pos   C7 H12 N2 O2  5
## 13:     114.07956_7.146_Labeled Unknown_Labeled Unknown_Pos    C5 H10 N2 O  5
## 14:                     348.22374_9.438_Unknown_Unknown_Pos  C12 H28 N8 O4 12
## 15:      112.0638_8.766_Labeled Unknown_Labeled Unknown_Pos     C5 H8 N2 O  5
## 16:     217.11769_8.519_Labeled Unknown_Labeled Unknown_Pos   C7 H15 N5 O3  7
## 17:                     174.11167_9.894_Unknown_Unknown_Pos   C6 H14 N4 O2  6
## 18:                      69.05802_8.774_Unknown_Unknown_Pos        C4 H7 N  4
## 19:                     264.18017_9.581_Unknown_Unknown_Pos  C10 H24 N4 O4 10
## 20:       115.0747_6.17_Labeled Unknown_Labeled Unknown_Pos     C4 H9 N3 O  4
## 21:                      236.1528_8.727_Unknown_Unknown_Pos  C13 H20 N2 O2  5
## 22:                      129.09041_9.44_Unknown_Unknown_Pos    C5 H11 N3 O  5
## 23:                     132.09003_8.769_Unknown_Unknown_Pos   C5 H12 N2 O2  5
## 24:                     372.17373_8.808_Unknown_Unknown_Pos C13 H28 N2 O10 12
## 25:                     113.04785_8.765_Unknown_Unknown_Pos     C5 H7 N O2  5
## 26:     111.07977_5.659_Labeled Unknown_Labeled Unknown_Pos       C5 H9 N3  5
## 27:                      69.05801_5.659_Unknown_Unknown_Pos        C4 H7 N  4
## 28:                      216.14758_8.58_Unknown_Unknown_Pos  C10 H20 N2 O3  5
## 29:                       96.0689_9.583_Unknown_Unknown_Pos       C5 H8 N2  5
## 30:                      85.05285_4.196_Unknown_Unknown_Pos      C4 H7 N O  4
##                                                      CompID        Formula V1
##             V2
##  1: 2433432201
##  2: 1814734155
##  3: 1516007668
##  4:  956660206
##  5:  603774146
##  6:  308427577
##  7:   74638416
##  8:   65372136
##  9:   61488051
## 10:   48550460
## 11:   46276921
## 12:   44160266
## 13:   34578251
## 14:   32671283
## 15:   30532688
## 16:   16719255
## 17:   15436775
## 18:   14340714
## 19:   13346436
## 20:   11673893
## 21:    9470794
## 22:    8208447
## 23:    7359230
## 24:    7334523
## 25:    6346959
## 26:    6258372
## 27:    5999499
## 28:    5357242
## 29:    4691248
## 30:    3524013
##             V2
most_abund_iso[V1 != 0 & grepl("Neg", CompID)][order(V2, decreasing = T)][1:20]
##                                                                  CompID
##  1:                         174.11163_9.439_DL-Arginine_DL-Arginine_Neg
##  2:                  132.0899_8.767_Labeled Unknown_Labeled Unknown_Neg
##  3:                             132.08991_9.579_Ornithine_Ornithine_Neg
##  4:                                 132.08991_9.439_Unknown_Unknown_Neg
##  5: 607.08266_9.886_UDP-N-acetylglucosamine_UDP-N-acetylglucosamine_Neg
##  6:                                                                <NA>
##  7:                                                                <NA>
##  8:                                                                <NA>
##  9:                                                                <NA>
## 10:                                                                <NA>
## 11:                                                                <NA>
## 12:                                                                <NA>
## 13:                                                                <NA>
## 14:                                                                <NA>
## 15:                                                                <NA>
## 16:                                                                <NA>
## 17:                                                                <NA>
## 18:                                                                <NA>
## 19:                                                                <NA>
## 20:                                                                <NA>
##               Formula V1          V2
##  1:      C6 H14 N4 O2  6 169671769.7
##  2:      C5 H12 N2 O2  5  62003057.5
##  3:      C5 H12 N2 O2  5  20662039.7
##  4:      C5 H12 N2 O2  5  10903381.0
##  5: C17 H27 N3 O17 P2  1    222146.8
##  6:              <NA> NA          NA
##  7:              <NA> NA          NA
##  8:              <NA> NA          NA
##  9:              <NA> NA          NA
## 10:              <NA> NA          NA
## 11:              <NA> NA          NA
## 12:              <NA> NA          NA
## 13:              <NA> NA          NA
## 14:              <NA> NA          NA
## 15:              <NA> NA          NA
## 16:              <NA> NA          NA
## 17:              <NA> NA          NA
## 18:              <NA> NA          NA
## 19:              <NA> NA          NA
## 20:              <NA> NA          NA
median_dat[NumLabeledC > 4 & ArgGroup == "HR" & Strain == "2243" & V2 > 1e7][order(V2, decreasing = T)]
##                                                      CompID
##  1:             174.11167_9.439_DL-Arginine_DL-Arginine_Pos
##  2:   175.09578_8.766_L-(+)-Citrulline_L-(+)-Citrulline_Pos
##  3:        132.08999_9.58_L(+)-Ornithine_L(+)-Ornithine_Pos
##  4:   175.09578_8.766_L-(+)-Citrulline_L-(+)-Citrulline_Pos
##  5:        132.08999_9.58_L(+)-Ornithine_L(+)-Ornithine_Pos
##  6:     115.06342_9.583_Labeled Unknown_Labeled Unknown_Pos
##  7:     115.06342_9.583_Labeled Unknown_Labeled Unknown_Pos
##  8:     158.06929_8.766_Labeled Unknown_Labeled Unknown_Pos
##  9:   175.09578_8.766_L-(+)-Citrulline_L-(+)-Citrulline_Pos
## 10:     158.06929_8.766_Labeled Unknown_Labeled Unknown_Pos
## 11:             174.11167_9.439_DL-Arginine_DL-Arginine_Pos
## 12:   175.09578_8.766_L-(+)-Citrulline_L-(+)-Citrulline_Pos
## 13:             114.07955_9.582_Prolinamide_Prolinamide_Pos
## 14:         115.06342_7.701_D-(+)-Proline_D-(+)-Proline_Pos
## 15:             114.07955_9.582_Prolinamide_Prolinamide_Pos
## 16:     158.06929_8.766_Labeled Unknown_Labeled Unknown_Pos
## 17:             174.11163_9.439_DL-Arginine_DL-Arginine_Neg
## 18:             174.11167_9.439_DL-Arginine_DL-Arginine_Pos
## 19:     158.06929_8.766_Labeled Unknown_Labeled Unknown_Pos
## 20:     350.19186_8.767_Labeled Unknown_Labeled Unknown_Pos
## 21: 174.10069_8.327_N-Acetylornithine_N-Acetylornithine_Pos
## 22:      132.0899_8.767_Labeled Unknown_Labeled Unknown_Neg
## 23:     129.09037_5.657_Labeled Unknown_Labeled Unknown_Pos
## 24:      132.0899_8.767_Labeled Unknown_Labeled Unknown_Neg
## 25:     132.08997_9.897_Labeled Unknown_Labeled Unknown_Pos
## 26:     156.09008_7.382_Labeled Unknown_Labeled Unknown_Pos
## 27:     350.19186_8.767_Labeled Unknown_Labeled Unknown_Pos
## 28:     156.09008_7.382_Labeled Unknown_Labeled Unknown_Pos
## 29:     350.19186_8.767_Labeled Unknown_Labeled Unknown_Pos
## 30:     114.07956_7.146_Labeled Unknown_Labeled Unknown_Pos
## 31:                     348.22374_9.438_Unknown_Unknown_Pos
## 32:      112.0638_8.766_Labeled Unknown_Labeled Unknown_Pos
## 33:     350.19186_8.767_Labeled Unknown_Labeled Unknown_Pos
## 34:     114.07956_7.146_Labeled Unknown_Labeled Unknown_Pos
## 35:     132.08997_9.897_Labeled Unknown_Labeled Unknown_Pos
## 36:             174.11163_9.439_DL-Arginine_DL-Arginine_Neg
## 37:                 132.08991_9.579_Ornithine_Ornithine_Neg
## 38:             174.11167_9.439_DL-Arginine_DL-Arginine_Pos
## 39:      112.0638_8.766_Labeled Unknown_Labeled Unknown_Pos
## 40:     217.11769_8.519_Labeled Unknown_Labeled Unknown_Pos
## 41:                     174.11167_9.894_Unknown_Unknown_Pos
## 42:                     264.18017_9.581_Unknown_Unknown_Pos
## 43:     350.19186_8.767_Labeled Unknown_Labeled Unknown_Pos
## 44:                     132.08991_9.439_Unknown_Unknown_Neg
##                                                      CompID
##                                                              FeatID
##  1:             174.11167_9.439_DL-Arginine_DL-Arginine_Pos_ExRate6
##  2:   175.09578_8.766_L-(+)-Citrulline_L-(+)-Citrulline_Pos_ExRate6
##  3:        132.08999_9.58_L(+)-Ornithine_L(+)-Ornithine_Pos_ExRate5
##  4:   175.09578_8.766_L-(+)-Citrulline_L-(+)-Citrulline_Pos_ExRate6
##  5:        132.08999_9.58_L(+)-Ornithine_L(+)-Ornithine_Pos_ExRate5
##  6:     115.06342_9.583_Labeled Unknown_Labeled Unknown_Pos_ExRate5
##  7:     115.06342_9.583_Labeled Unknown_Labeled Unknown_Pos_ExRate5
##  8:     158.06929_8.766_Labeled Unknown_Labeled Unknown_Pos_ExRate6
##  9:   175.09578_8.766_L-(+)-Citrulline_L-(+)-Citrulline_Pos_ExRate5
## 10:     158.06929_8.766_Labeled Unknown_Labeled Unknown_Pos_ExRate6
## 11:             174.11167_9.439_DL-Arginine_DL-Arginine_Pos_ExRate6
## 12:   175.09578_8.766_L-(+)-Citrulline_L-(+)-Citrulline_Pos_ExRate5
## 13:             114.07955_9.582_Prolinamide_Prolinamide_Pos_ExRate5
## 14:         115.06342_7.701_D-(+)-Proline_D-(+)-Proline_Pos_ExRate5
## 15:             114.07955_9.582_Prolinamide_Prolinamide_Pos_ExRate5
## 16:     158.06929_8.766_Labeled Unknown_Labeled Unknown_Pos_ExRate5
## 17:             174.11163_9.439_DL-Arginine_DL-Arginine_Neg_ExRate6
## 18:             174.11167_9.439_DL-Arginine_DL-Arginine_Pos_ExRate5
## 19:     158.06929_8.766_Labeled Unknown_Labeled Unknown_Pos_ExRate5
## 20:    350.19186_8.767_Labeled Unknown_Labeled Unknown_Pos_ExRate12
## 21: 174.10069_8.327_N-Acetylornithine_N-Acetylornithine_Pos_ExRate5
## 22:      132.0899_8.767_Labeled Unknown_Labeled Unknown_Neg_ExRate5
## 23:     129.09037_5.657_Labeled Unknown_Labeled Unknown_Pos_ExRate5
## 24:      132.0899_8.767_Labeled Unknown_Labeled Unknown_Neg_ExRate5
## 25:     132.08997_9.897_Labeled Unknown_Labeled Unknown_Pos_ExRate5
## 26:     156.09008_7.382_Labeled Unknown_Labeled Unknown_Pos_ExRate5
## 27:    350.19186_8.767_Labeled Unknown_Labeled Unknown_Pos_ExRate11
## 28:     156.09008_7.382_Labeled Unknown_Labeled Unknown_Pos_ExRate5
## 29:    350.19186_8.767_Labeled Unknown_Labeled Unknown_Pos_ExRate12
## 30:     114.07956_7.146_Labeled Unknown_Labeled Unknown_Pos_ExRate5
## 31:                    348.22374_9.438_Unknown_Unknown_Pos_ExRate12
## 32:      112.0638_8.766_Labeled Unknown_Labeled Unknown_Pos_ExRate5
## 33:    350.19186_8.767_Labeled Unknown_Labeled Unknown_Pos_ExRate11
## 34:     114.07956_7.146_Labeled Unknown_Labeled Unknown_Pos_ExRate5
## 35:     132.08997_9.897_Labeled Unknown_Labeled Unknown_Pos_ExRate5
## 36:             174.11163_9.439_DL-Arginine_DL-Arginine_Neg_ExRate6
## 37:                 132.08991_9.579_Ornithine_Ornithine_Neg_ExRate5
## 38:             174.11167_9.439_DL-Arginine_DL-Arginine_Pos_ExRate5
## 39:      112.0638_8.766_Labeled Unknown_Labeled Unknown_Pos_ExRate5
## 40:     217.11769_8.519_Labeled Unknown_Labeled Unknown_Pos_ExRate7
## 41:                     174.11167_9.894_Unknown_Unknown_Pos_ExRate6
## 42:                    264.18017_9.581_Unknown_Unknown_Pos_ExRate10
## 43:    350.19186_8.767_Labeled Unknown_Labeled Unknown_Pos_ExRate10
## 44:                     132.08991_9.439_Unknown_Unknown_Neg_ExRate5
##                                                              FeatID
##                  Name       Formula           MSI NumLabeledC ArgGroup Strain
##  1:       DL-Arginine  C6 H14 N4 O2 MSI_1;Labeled           6       HR   2243
##  2:  L-(+)-Citrulline  C6 H13 N3 O3 MSI_1;Labeled           6       HR   2243
##  3:    L(+)-Ornithine  C5 H12 N2 O2 MSI_1;Labeled           5       HR   2243
##  4:  L-(+)-Citrulline  C6 H13 N3 O3 MSI_1;Labeled           6       HR   2243
##  5:    L(+)-Ornithine  C5 H12 N2 O2 MSI_1;Labeled           5       HR   2243
##  6:   Labeled Unknown    C5 H9 N O2       Labeled           5       HR   2243
##  7:   Labeled Unknown    C5 H9 N O2       Labeled           5       HR   2243
##  8:   Labeled Unknown  C6 H10 N2 O3       Labeled           6       HR   2243
##  9:  L-(+)-Citrulline  C6 H13 N3 O3 MSI_1;Labeled           5       HR   2243
## 10:   Labeled Unknown  C6 H10 N2 O3       Labeled           6       HR   2243
## 11:       DL-Arginine  C6 H14 N4 O2 MSI_1;Labeled           6       HR   2243
## 12:  L-(+)-Citrulline  C6 H13 N3 O3 MSI_1;Labeled           5       HR   2243
## 13:       Prolinamide   C5 H10 N2 O MSI_3;Labeled           5       HR   2243
## 14:     D-(+)-Proline    C5 H9 N O2 MSI_1;Labeled           5       HR   2243
## 15:       Prolinamide   C5 H10 N2 O MSI_3;Labeled           5       HR   2243
## 16:   Labeled Unknown  C6 H10 N2 O3       Labeled           5       HR   2243
## 17:       DL-Arginine  C6 H14 N4 O2 MSI_1;Labeled           6       HR   2243
## 18:       DL-Arginine  C6 H14 N4 O2 MSI_1;Labeled           5       HR   2243
## 19:   Labeled Unknown  C6 H10 N2 O3       Labeled           5       HR   2243
## 20:   Labeled Unknown C12 H26 N6 O6       Labeled          12       HR   2243
## 21: N-Acetylornithine  C7 H14 N2 O3 MSI_3;Labeled           5       HR   2243
## 22:   Labeled Unknown  C5 H12 N2 O2       Labeled           5       HR   2243
## 23:   Labeled Unknown   C5 H11 N3 O       Labeled           5       HR   2243
## 24:   Labeled Unknown  C5 H12 N2 O2       Labeled           5       HR   2243
## 25:   Labeled Unknown  C5 H12 N2 O2       Labeled           5       HR   2243
## 26:   Labeled Unknown  C7 H12 N2 O2       Labeled           5       HR   2243
## 27:   Labeled Unknown C12 H26 N6 O6       Labeled          11       HR   2243
## 28:   Labeled Unknown  C7 H12 N2 O2       Labeled           5       HR   2243
## 29:   Labeled Unknown C12 H26 N6 O6       Labeled          12       HR   2243
## 30:   Labeled Unknown   C5 H10 N2 O       Labeled           5       HR   2243
## 31:           Unknown C12 H28 N8 O4     UnknownNA          12       HR   2243
## 32:   Labeled Unknown    C5 H8 N2 O       Labeled           5       HR   2243
## 33:   Labeled Unknown C12 H26 N6 O6       Labeled          11       HR   2243
## 34:   Labeled Unknown   C5 H10 N2 O       Labeled           5       HR   2243
## 35:   Labeled Unknown  C5 H12 N2 O2       Labeled           5       HR   2243
## 36:       DL-Arginine  C6 H14 N4 O2 MSI_1;Labeled           6       HR   2243
## 37:         Ornithine  C5 H12 N2 O2 MSI_1:Labeled           5       HR   2243
## 38:       DL-Arginine  C6 H14 N4 O2 MSI_1;Labeled           5       HR   2243
## 39:   Labeled Unknown    C5 H8 N2 O       Labeled           5       HR   2243
## 40:   Labeled Unknown  C7 H15 N5 O3       Labeled           7       HR   2243
## 41:           Unknown  C6 H14 N4 O2     UnknownNA           6       HR   2243
## 42:           Unknown C10 H24 N4 O4     UnknownNA          10       HR   2243
## 43:   Labeled Unknown C12 H26 N6 O6       Labeled          10       HR   2243
## 44:           Unknown  C5 H12 N2 O2     UnknownNA           5       HR   2243
##                  Name       Formula           MSI NumLabeledC ArgGroup Strain
##     TimePoint Time         V1         V2        V3
##  1:       TP3   44 2800397763 2433432201 382702639
##  2:       TP3   44 1767954825 1814734155 184582720
##  3:       TP4   56 1656425405 1516007668 179889527
##  4:       TP4   56 1197196415 1186464150 236262499
##  5:       TP3   44 1133172993 1133038728  15864483
##  6:       TP4   56 1062857832  956660206 110799577
##  7:       TP3   44  763543885  783875160  23366967
##  8:       TP3   44  583347351  603774146  60067528
##  9:       TP4   56  575143733  595977430  69713210
## 10:       TP4   56  424226146  400286091  70028594
## 11:       TP4   56   68687100  352221281 317903396
## 12:       TP3   44  277247672  340676057  73422594
## 13:       TP4   56  337860170  308427577  37518663
## 14:       TP4   56  243836327  282383359 145379078
## 15:       TP3   44  243401774  244825242   1666906
## 16:       TP4   56  209903719  209629445  22842592
## 17:       TP3   44  164824316  169671770  44107138
## 18:       TP3   44  142547048  125117750  19568646
## 19:       TP3   44   95848036  116459815  25916026
## 20:       TP3   44   74413926   74638416   4029161
## 21:       TP4   56   75060415   65372136  13501343
## 22:       TP3   44   64425396   62003058   5398596
## 23:       TP3   44   58308097   61488051  21332380
## 24:       TP4   56   44955647   50784230  10983747
## 25:       TP4   56   47806664   46276921   2859598
## 26:       TP4   56   41921306   44160266   2747735
## 27:       TP4   56   36649678   37475301   1421078
## 28:       TP3   44   39130481   37117172   2365158
## 29:       TP4   56   38225076   36117309   8146416
## 30:       TP4   56   35579425   34578251   4065184
## 31:       TP3   44   36248167   32671283  10732100
## 32:       TP3   44   32405282   30532688   5361084
## 33:       TP3   44   25407180   27119842   3566237
## 34:       TP3   44   20843708   21717832   2105291
## 35:       TP3   44   21409686   21704627   1990498
## 36:       TP4   56    9414202   21368153  16469454
## 37:       TP3   44   21716404   20662040   1872655
## 38:       TP4   56    3104331   19765075  18234943
## 39:       TP4   56   20374372   19551392   4619979
## 40:       TP3   44   15444557   16719255   5788029
## 41:       TP3   44   16553992   15436775   1197296
## 42:       TP4   56   14672438   13346436   1569095
## 43:       TP4   56   10501751   12445262   3346754
## 44:       TP3   44    9098434   10903381   4081690
##     TimePoint Time         V1         V2        V3
top_labeled_comps <- frac_labeling[V2 > 1e5 & LabelFrac > 0.1, CompID]
top_labeled_comps2<- frac_labeling[V2 > 1e7 & V1+V2 > 5e7 & LabelFrac > 0.1, CompID]


## How many features have >5 labeled carbons? 
unique(hilic_intra[NumLabeledC > 5 & Strain == "2243" & AreaFrac > 1e6 & ArgGroup == "HR" & !grepl("rginine", Name) & 
                     !grepl("itrulline", Name) & !grepl("rnithine", Name) & TimePoint=="TP4",
                   list(NumLabeledC, Name, Formula, CompID)])[,unique(CompID)]
##  [1] "404.11103_8.8_Unknown_Unknown_Pos"                  
##  [2] "135.10497_1.603_Unknown_Unknown_Pos"                
##  [3] "348.22374_9.438_Unknown_Unknown_Pos"                
##  [4] "171.12614_1.113_Unknown_Unknown_Pos"                
##  [5] "174.11167_9.894_Unknown_Unknown_Pos"                
##  [6] "217.11769_8.519_Labeled Unknown_Labeled Unknown_Pos"
##  [7] "372.17373_8.808_Unknown_Unknown_Pos"                
##  [8] "264.18017_9.581_Unknown_Unknown_Pos"                
##  [9] "350.19186_8.767_Labeled Unknown_Labeled Unknown_Pos"
## [10] "155.06964_9.527_Unknown_Unknown_Pos"                
## [11] "158.06929_8.766_Labeled Unknown_Labeled Unknown_Pos"
unique(hilic_intra[NumLabeledC >= 5 & Strain == "2243" & AreaFrac > 1e6 & ArgGroup == "HR"  & TimePoint=="TP4",
                   list(NumLabeledC, Name, Formula, CompID)])[,unique(CompID)]
##  [1] "173.11665_9.234_Unknown_Unknown_Pos"                    
##  [2] "174.10067_8.188_Labeled Unknown_Labeled Unknown_Pos"    
##  [3] "129.09037_5.657_Labeled Unknown_Labeled Unknown_Pos"    
##  [4] "404.11103_8.8_Unknown_Unknown_Pos"                      
##  [5] "216.14758_8.58_Unknown_Unknown_Pos"                     
##  [6] "135.10497_1.603_Unknown_Unknown_Pos"                    
##  [7] "348.22374_9.438_Unknown_Unknown_Pos"                    
##  [8] "129.09041_9.44_Unknown_Unknown_Pos"                     
##  [9] "132.08991_9.439_Unknown_Unknown_Neg"                    
## [10] "171.12614_1.113_Unknown_Unknown_Pos"                    
## [11] "96.0689_9.583_Unknown_Unknown_Pos"                      
## [12] "174.11167_9.894_Unknown_Unknown_Pos"                    
## [13] "227.01498_7.695_Unknown_Unknown_Pos"                    
## [14] "113.04785_8.765_Unknown_Unknown_Pos"                    
## [15] "132.09003_8.769_Unknown_Unknown_Pos"                    
## [16] "236.1528_8.727_Unknown_Unknown_Pos"                     
## [17] "132.08991_9.579_Ornithine_Ornithine_Neg"                
## [18] "217.11769_8.519_Labeled Unknown_Labeled Unknown_Pos"    
## [19] "174.11163_9.439_DL-Arginine_DL-Arginine_Neg"            
## [20] "372.17373_8.808_Unknown_Unknown_Pos"                    
## [21] "264.18017_9.581_Unknown_Unknown_Pos"                    
## [22] "174.11167_9.439_DL-Arginine_DL-Arginine_Pos"            
## [23] "112.0638_8.766_Labeled Unknown_Labeled Unknown_Pos"     
## [24] "114.07956_7.146_Labeled Unknown_Labeled Unknown_Pos"    
## [25] "132.0899_8.767_Labeled Unknown_Labeled Unknown_Neg"     
## [26] "174.10069_8.327_N-Acetylornithine_N-Acetylornithine_Pos"
## [27] "132.08997_9.897_Labeled Unknown_Labeled Unknown_Pos"    
## [28] "156.09008_7.382_Labeled Unknown_Labeled Unknown_Pos"    
## [29] "350.19186_8.767_Labeled Unknown_Labeled Unknown_Pos"    
## [30] "155.06964_9.527_Unknown_Unknown_Pos"                    
## [31] "114.07955_9.582_Prolinamide_Prolinamide_Pos"            
## [32] "158.06929_8.766_Labeled Unknown_Labeled Unknown_Pos"    
## [33] "115.06342_9.583_Labeled Unknown_Labeled Unknown_Pos"    
## [34] "132.08999_9.58_L(+)-Ornithine_L(+)-Ornithine_Pos"       
## [35] "115.06342_7.701_D-(+)-Proline_D-(+)-Proline_Pos"        
## [36] "175.09578_8.766_L-(+)-Citrulline_L-(+)-Citrulline_Pos"
bad_comps <- hilic_intra[NumLabeledC >= 5 & AreaFrac > 1e6& ArgGroup != "HR" & value > 5,
                         list(NumLabeledC, Name, Formula, CompID)][,unique(CompID)]

unique(hilic_intra[NumLabeledC >= 5 & Strain == "2243" & AreaFrac > 1e6 & ArgGroup == "HR"  & TimePoint=="TP4" & !CompID %in% bad_comps,
                   list(NumLabeledC, Name, Formula, CompID)])[,unique(CompID)]
##  [1] "173.11665_9.234_Unknown_Unknown_Pos"                    
##  [2] "174.10067_8.188_Labeled Unknown_Labeled Unknown_Pos"    
##  [3] "129.09037_5.657_Labeled Unknown_Labeled Unknown_Pos"    
##  [4] "404.11103_8.8_Unknown_Unknown_Pos"                      
##  [5] "216.14758_8.58_Unknown_Unknown_Pos"                     
##  [6] "348.22374_9.438_Unknown_Unknown_Pos"                    
##  [7] "129.09041_9.44_Unknown_Unknown_Pos"                     
##  [8] "132.08991_9.439_Unknown_Unknown_Neg"                    
##  [9] "96.0689_9.583_Unknown_Unknown_Pos"                      
## [10] "174.11167_9.894_Unknown_Unknown_Pos"                    
## [11] "227.01498_7.695_Unknown_Unknown_Pos"                    
## [12] "113.04785_8.765_Unknown_Unknown_Pos"                    
## [13] "132.09003_8.769_Unknown_Unknown_Pos"                    
## [14] "236.1528_8.727_Unknown_Unknown_Pos"                     
## [15] "132.08991_9.579_Ornithine_Ornithine_Neg"                
## [16] "217.11769_8.519_Labeled Unknown_Labeled Unknown_Pos"    
## [17] "174.11163_9.439_DL-Arginine_DL-Arginine_Neg"            
## [18] "372.17373_8.808_Unknown_Unknown_Pos"                    
## [19] "264.18017_9.581_Unknown_Unknown_Pos"                    
## [20] "174.11167_9.439_DL-Arginine_DL-Arginine_Pos"            
## [21] "112.0638_8.766_Labeled Unknown_Labeled Unknown_Pos"     
## [22] "132.0899_8.767_Labeled Unknown_Labeled Unknown_Neg"     
## [23] "174.10069_8.327_N-Acetylornithine_N-Acetylornithine_Pos"
## [24] "132.08997_9.897_Labeled Unknown_Labeled Unknown_Pos"    
## [25] "156.09008_7.382_Labeled Unknown_Labeled Unknown_Pos"    
## [26] "350.19186_8.767_Labeled Unknown_Labeled Unknown_Pos"    
## [27] "155.06964_9.527_Unknown_Unknown_Pos"                    
## [28] "158.06929_8.766_Labeled Unknown_Labeled Unknown_Pos"    
## [29] "115.06342_7.701_D-(+)-Proline_D-(+)-Proline_Pos"
hilic_intra[!CompID %in% bad_comps, length(unique(CompID))]
## [1] 234
feat_data_wide <- dcast(median_dat[ArgGroup == "HR"], FeatID+CompID+Formula+Name+NumLabeledC~Strain+TimePoint, value.var = "V1")
max_vals <- hilic_intra[,.(MaxVal = max(AreaFrac)), by = FeatID]
feat_data_wide <- merge(feat_data_wide, max_vals, by = "FeatID", all.x = T)

new_color_scale <- c("gray", brewer.pal(9, "YlGnBu"))
median_dat[,NumLabeledC_v2:=ifelse(NumLabeledC >= 9, "9 or more", NumLabeledC)]

tot_vals <- feat_data_wide[,list(sum(`2243_TP4`), sum(c_TP3)), by = CompID]
tot_vals[,log2FC:=log2((V2+1000)/(V1+1000))]

comp_order <- median_dat[V1 > 1e5 & Strain == "2243", list(max(NumLabeledC), V1[which.max(NumLabeledC)]), by = CompID][order(V1, V2, decreasing = T), CompID]

top_id_comps <- top_labeled_comps[!grepl("known", top_labeled_comps)]
top_id_comps <- sort(top_id_comps)[c(1:4,6, 7, 8, 9, 10, 11, 13, 14, 15:18)]
median_dat[,NumLabeledC2:=paste0("M+", NumLabeledC_v2)]

comp_order2 <- median_dat[V1 > 1e5 & Strain == "2243" & CompID %in% top_id_comps, list(max(NumLabeledC), V1[which.max(NumLabeledC)]), by = Name][order(V1, V2, decreasing = T), Name]

hilic_intra_labels <- ggplot(median_dat[ArgGroup == "HR" & Strain == "2243" & CompID %in% top_id_comps & !grepl("known", Name)], 
                             aes(x = factor(Time), y = V2, fill = factor(NumLabeledC2))) + geom_bar(stat = "identity", position = "fill", color = "black") + facet_wrap(~factor(Name, levels = comp_order2), ncol = 4, scales = "free_y") + scale_fill_manual(values = new_color_scale, name = "Labeling Pattern") + theme(axis.text = element_text(size = 9), strip.text = element_text(size = 9)) + xlab("Time (hr)") + ylab("Share of Peak Area") + theme(strip.background = element_blank())

ggsave(hilic_intra_labels, file = paste0(outdir, "hilic_intra_arg_topIDcomps_sub.pdf"), width = 6, height = 4.5)

hilic_intra_labels

############# Supp table
tab_save <- median_dat[(CompID %in% top_labeled_comps) & Strain != "c" & ArgGroup == "HR"][V1 != 0 & V2 != 0]
setnames(tab_save, c("V2", "V3"), c("MeanPeakArea", "SEPeakArea"))
tab_save[,MID:=MeanPeakArea/sum(MeanPeakArea), by = list(CompID, Strain, ArgGroup)]
good_isos <- tab_save[,sum(MeanPeakArea), by=list(CompID, NumLabeledC)][V1 > 50000]

tab_save <- dcast(tab_save[paste0(CompID, NumLabeledC) %in% good_isos[,paste0(CompID, NumLabeledC)] & Time == 56], CompID+Name+MSI+Formula+Strain ~ paste0("M+", NumLabeledC), value.var = c("MeanPeakArea", "SEPeakArea", "MID"))

names(tab_save)[grepl("MeanPeakArea", names(tab_save))] <- paste0(gsub("MeanPeakArea_", "", names(tab_save)[grepl("MeanPeakArea", names(tab_save))]), "_Mean Peak Area")
names(tab_save)[grepl("SEPeakArea", names(tab_save))] <- paste0(gsub("SEPeakArea_", "", names(tab_save)[grepl("SEPeakArea", names(tab_save))]), "_SE Peak Area")
names(tab_save)[grepl("MID", names(tab_save))] <- paste0(gsub("MID_", "", names(tab_save)[grepl("MID", names(tab_save))]), "_MID")
tab_save[,MolWt:=sapply(CompID, function(x){
  foo <- strsplit(x, split = "_")[[1]]
  return(foo[1])
})]
tab_save[,RT:=sapply(CompID, function(x){
  foo <- strsplit(x, split = "_")[[1]]
  return(foo[2])
})]
tab_save[,MSI:=gsub("MSI_", "", str_extract(MSI, "MSI_[0-9]"))]
tab_save[,CompID:=sapply(CompID, function(x){
  foo <- strsplit(x, split = "_")[[1]]
  return(paste(foo[c(1,2,3,5)], collapse = "_"))
})]
tab_save <- tab_save[,c("CompID", "Name", "MSI", "MolWt", "Formula", "RT", sort(names(tab_save)[6:44])), with=F]
fwrite(tab_save[order(MolWt)], file = paste0(outdir, "intracellular_arg_labeling_summary.tsv"), sep = "\t")

8 Figure S5 - Arginine SIRM extracellular

####################### Extracellular
rm(list = ls())
datadir <- "../SILArginine2022-03/"
outdir <- "figure_s6/"

hilic_extra <- readRDS(paste0(datadir, "SIL_Extra_HILIC_LowLevelDataAllProcessed.rds"))
hilic_extra[,MinFeatValue:=min(AreaFrac[AreaFrac != 0]), by = FeatID]
hilic_extra[,log10value:=log10(AreaFrac + 0.25*MinFeatValue)]

labeled_comps <- hilic_extra[,sum(AreaFrac), by=list(CompID, NumLabeledC)][NumLabeledC > 0 & V1 > 1e5, unique(CompID)]

frac_labeling <- hilic_extra[ArgGroup=="HR" & Strain != "c",list(sum(AreaFrac[NumLabeledC==0]), sum(AreaFrac[NumLabeledC != 0])), by=CompID]
frac_labeling[,LabelFrac:=V2/(V2+V1)]
top_labeled_comps <- frac_labeling[V2 > 1e5 & LabelFrac > 0.1, CompID]
top_labeled_comps2<- frac_labeling[V2 > 1e7 & V1+V2 > 5e7 & LabelFrac > 0.1, CompID]
top_labeled_comps3 <- frac_labeling[LabelFrac > 0.05, CompID]

median_dat <- hilic_extra[,median(AreaFrac), by = list(CompID, Formula, FeatID, NumLabeledC, Name, ArgGroup, Strain, TimePoint, Time)]
most_abund_iso <- median_dat[ArgGroup == "HR" & Strain == "2243",list(NumLabeledC[which.max(V1)], max(V1)), by = list(CompID, Formula)]

hilic_extra[,IonMode:=ifelse(grepl("Pos$", CompID), "Pos", "Neg")]

mean_dat <- hilic_extra[,list(mean(AreaFrac), sd(AreaFrac)/sqrt(length(AreaFrac)), mean(value)), 
                        by = list(CompID, FeatID, Formula, MSI, NumLabeledC, IonMode, ArgGroup, Strain, TimePoint, Time, Name)]
mean_dat_sub <- mean_dat[ArgGroup == "HR" & Strain == "2243" & FeatID %in% most_abund_iso[V1 != 0 & grepl("Pos$", CompID)][order(V2, decreasing = T)][1:3, paste0(CompID, "_ExRate", V1)]]
mean_dat_sub[,Name2:=factor(Name, levels = c("DL-Arginine", "L-(+)-Citrulline", "L(+)-Ornithine"))]
levels(mean_dat_sub$Name2) <- c("Arginine\n(6 labels)", "Citrulline\n(6 labels)", "Ornithine\n(5 labels)")
arg_plot <- ggplot(mean_dat_sub, aes(x = Time, y = V1, color = Name2)) + geom_point(size = 2) + geom_line(aes(group = FeatID)) + geom_errorbar(aes(ymin = V1-V2, ymax = V1+V2), width = 0) + scale_color_brewer(palette = "Set1", name = "") + ylab("Peak area in supernatant") + xlab("Time (hr)") + theme(axis.text = element_text(size = 10))

ggsave(arg_plot, file = paste0(outdir, "arg_labels.pdf"), width = 4, height = 2.6)

arg_plot

feat_data_wide <- dcast(mean_dat[Strain == "2243" & ArgGroup == "HR"], FeatID+CompID+Formula+Name+NumLabeledC~TimePoint, value.var = "V1")

max_vals <- hilic_extra[,.(MaxVal = max(AreaFrac)), by = FeatID]
feat_data_wide <- merge(feat_data_wide, max_vals, by = "FeatID", all.x = T)
feat_data_wide[,log2FCTP4_TP0:=log2((TP4+1000)/(TP0+1000))]


new_color_scale <- c("gray", brewer.pal(9, "YlGnBu"))

tot_vals <- feat_data_wide[,list(sum(TP0), sum(TP4)), by = CompID]
tot_vals[,log2FC:=log2((V2+1000)/(V1+1000))]
comp_order <- tot_vals[,log2FC[which.max(abs(log2FC))], by = CompID][order(V1), CompID]


top_comps2 <- feat_data_wide[(log2FCTP4_TP0 > 4|Name=="DL-Arginine") & MaxVal > 1e6 & NumLabeledC > 0, unique(CompID)]
top_comps2b <- top_comps2[!grepl("known", top_comps2)]
top_comps2b <- sort(top_comps2b)[c(1:5, 7:15, 17, 18, 20, 21, 22)]
top_comps3 <- intersect(top_comps2b, top_labeled_comps3)

comp_order2 <- feat_data_wide[CompID %in% top_comps2b & TP4 != 0, 
                              list(max(NumLabeledC), TP4[which.max(NumLabeledC)], NumLabeledC[which.max(TP4)]), by = Name][order(V3, V1, V2, decreasing = T), Name]
comp_order2 <- comp_order2[c(2, 1, 3:19)]
mean_dat[,NumLC2:=paste0("M+", NumLabeledC)]

hilic_extra_labels <- ggplot(mean_dat[ArgGroup == "HR" & Strain == "2243" & CompID %in% top_comps3 & !grepl("known", Name)], 
                             aes(x = factor(Time), y = V1, fill = factor(NumLC2))) + geom_bar(stat = "identity", color = "black") + 
  facet_wrap(~factor(Name, levels = comp_order2), ncol = 3, scales = "free_y") + 
  scale_fill_manual(values = new_color_scale, name = "Labeling Pattern") + 
  theme(axis.text = element_text(size = 7.5), strip.text = element_text(size = 9)) + xlab("Time (hr)") + 
  ylab("Mean Peak Area from Supernatant") + theme(strip.background = element_blank())

ggsave(hilic_extra_labels, file = paste0(outdir, "sil_arg_hilic_extra_barplots_v2.pdf"), width = 7, height = 6)

hilic_extra_labels

most_abund_comps_pos <- mean_dat[ArgGroup == "HR" & Strain == "2243" & grepl("Pos$", CompID), sum(V1), by=CompID][order(V1, decreasing = T)][1:12, CompID]
tot_signal_plot_extra <- ggplot(mean_dat[ArgGroup == "HR" & Strain == "2243" & CompID %in% most_abund_comps_pos & 
                                           NumLC2 %in% c("M+0", "M+5", "M+6")], aes(x = factor(Time), fill = factor(CompID, levels = most_abund_comps_pos), y = V1)) + 
  geom_bar(position = "stack", stat = "identity", color = "black") + facet_wrap(~NumLC2, nrow = 1) + 
  scale_fill_manual(values =  brewer.pal(12, "Set3"), name = "Compound ID") + ylab("Mean Peak Area") + xlab("Time (hr)") + 
  theme(legend.position = "bottom", strip.text = element_text(size = 12)) + guides(fill = guide_legend(ncol = 2))#+ guides(fill = "none")

ggsave(tot_signal_plot_extra, file = paste0(outdir, "totalSignalExtracellular.pdf"), width = 7, height = 5)

tot_signal_plot_extra

############# Supp table
tab_save <- mean_dat[(CompID %in% intersect(top_comps2, top_labeled_comps3)) & Strain != "c" & ArgGroup == "HR"][V1 != 0]
setnames(tab_save, c("V1", "V2"), c("MeanPeakArea", "SEPeakArea"))
tab_save[,MID:=MeanPeakArea/sum(MeanPeakArea), by = list(CompID, Strain, ArgGroup)]
good_isos <- tab_save[,sum(MeanPeakArea), by=list(CompID, NumLabeledC)][V1 > 50000]

tab_save <- dcast(tab_save[paste0(CompID, NumLabeledC) %in% good_isos[,paste0(CompID, NumLabeledC)] & Time == 56], CompID+Name+MSI+Formula+Strain ~ paste0("M+", NumLabeledC), value.var = c("MeanPeakArea", "SEPeakArea", "MID"))

names(tab_save)[grepl("MeanPeakArea", names(tab_save))] <- paste0(gsub("MeanPeakArea_", "", names(tab_save)[grepl("MeanPeakArea", names(tab_save))]), "_Mean Peak Area")
names(tab_save)[grepl("SEPeakArea", names(tab_save))] <- paste0(gsub("SEPeakArea_", "", names(tab_save)[grepl("SEPeakArea", names(tab_save))]), "_SE Peak Area")
names(tab_save)[grepl("MID", names(tab_save))] <- paste0(gsub("MID_", "", names(tab_save)[grepl("MID", names(tab_save))]), "_MID")
tab_save[,MolWt:=sapply(CompID, function(x){
  foo <- strsplit(x, split = "_")[[1]]
  return(foo[1])
})]
tab_save[,RT:=sapply(CompID, function(x){
  foo <- strsplit(x, split = "_")[[1]]
  return(foo[2])
})]
tab_save[,MSI:=gsub("MSI_", "", str_extract(MSI, "MSI_[0-9]"))]
tab_save[,CompID:=sapply(CompID, function(x){
  foo <- strsplit(x, split = "_")[[1]]
  return(paste(foo[c(1,2,3,5)], collapse = "_"))
})]
tab_save <- tab_save[,c("CompID", "Name", "MSI", "MolWt", "Formula", "RT", sort(names(tab_save)[6:44])), with=F]
fwrite(tab_save[order(MolWt)], file = paste0(outdir, "extracellular_arg_labeling_summary.tsv"), sep = "\t")